diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..4eaf66513
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+target
+.idea
+*.iml
+derby.log
+metastore_db/
+spark-warehouse/
+dependency-reduced-pom.xml
+core/src/execution/generated
+prebuild
+.flattened-pom.xml
diff --git a/.scalafix.conf b/.scalafix.conf
new file mode 100644
index 000000000..0f23f57cd
--- /dev/null
+++ b/.scalafix.conf
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+rules = [
+  ExplicitResultTypes,
+  NoAutoTupling,
+  RemoveUnused,
+
+  DisableSyntax,
+  LeakingImplicitClassVal,
+  NoValInForComprehension,
+  ProcedureSyntax,
+  RedundantSyntax
+]
diff --git a/DEBUGGING.md b/DEBUGGING.md
new file mode 100644
index 000000000..e348b7215
--- /dev/null
+++ b/DEBUGGING.md
@@ -0,0 +1,96 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Comet Debugging Guide
+
+This HOWTO describes how to debug JVM code and Native code concurrently. The guide assumes you have:
+1. Intellij as the Java IDE
+2. CLion as the Native IDE. For Rust code, the CLion Rust language plugin is required. Note that the
+Intellij Rust plugin is not sufficient.
+3. CLion/LLDB as the native debugger. CLion ships with a bundled LLDB and the Rust community has
+its own packaging of LLDB (`lldb-rust`). Both provide a better display of Rust symbols than plain
+LLDB or the LLDB that is bundled with XCode. We will use the LLDB packaged with CLion for this guide.
+4. We will use a Comet _unit_ test as the canonical use case.
+
+_Caveat: The steps here have only been tested with JDK 11_ on Mac (M1)
+
+## Debugging for Advanced Developers
+
+Add a `.lldbinit` to comet/core. This is not strictly necessary but will be useful if you want to
+use advanced `lldb` debugging.
+
+### In Intellij
+
+1. Set a breakpoint in `NativeBase.load()`, at a point _after_ the Comet library has been loaded.
+
+1. Add a Debug Configuration for the unit test
+
+1. In the Debug Configuration for that unit test add `-Xint` as a JVM parameter. This option is
+undocumented *magic*. Without this, the LLDB debugger hits a EXC_BAD_ACCESS (or EXC_BAD_INSTRUCTION) from
+which one cannot recover.
+
+1. Add  a println to the unit test to print the PID of the JVM process. (jps can also be used but this is less error prone if you have multiple jvm processes running)
+     ``` JDK8
+          println("Waiting for Debugger: PID - ", ManagementFactory.getRuntimeMXBean().getName())
+     ```
+      This will print something like : `PID@your_machine_name`.
+
+     For JDK9 and newer
+     ```JDK9
+          println("Waiting for Debugger: PID - ", ProcessHandle.current.pid)
+     ```
+
+     ==> Note the PID
+
+1. Debug-run the test in Intellij and wait for the breakpoint to be hit
+
+### In CLion
+
+1. After the breakpoint is hit in Intellij, in Clion (or LLDB from terminal or editor) -
+
+   1. Attach to the jvm process (make sure the PID matches). In CLion, this is `Run -> Atttach to process`
+
+   1. Put your breakpoint in the native code
+
+1. Go back to intellij and resume the process.
+
+1. Most debugging in CLion is similar to Intellij. For advanced LLDB based debugging the LLDB command line can be accessed from the LLDB tab in the Debugger view. Refer to the [LLDB manual](https://lldb.llvm.org/use/tutorial.html) for LLDB commands.
+
+### After your debugging is done,
+
+1. In CLion, detach from the process if not already detached
+
+2. In Intellij, the debugger might have lost track of the process. If so, the debugger tab
+   will show the process as running (even if the test/job is shown as completed).
+
+3. Close the debugger tab, and if the IDS asks whether it should terminate the process,
+   click Yes.
+
+4. In terminal, use jps to identify the process with the process id you were debugging. If
+   it shows up as running, kill -9 [pid]. If that doesn't remove the process, don't bother,
+   the process will be left behind as a zombie and will consume no (significant) resources.
+   Eventually it will be cleaned up when you reboot possibly after a software update.
+
+### Additional Info
+
+OpenJDK mailing list on debugging the JDK on MacOS
+https://mail.openjdk.org/pipermail/hotspot-dev/2019-September/039429.html
+
+Detecting the debugger
+https://stackoverflow.com/questions/5393403/can-a-java-application-detect-that-a-debugger-is-attached#:~:text=No.,to%20let%20your%20app%20continue.&text=I%20know%20that%20those%20are,meant%20with%20my%20first%20phrase).
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
new file mode 100644
index 000000000..1793bb9e2
--- /dev/null
+++ b/DEVELOPMENT.md
@@ -0,0 +1,65 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Comet Development Guide
+
+## Project Layout
+
+```
+├── common     <- common Java/Scala code
+├── conf       <- configuration files
+├── core       <- core native code, in Rust
+├── spark      <- Spark integration
+```
+
+## Development Setup
+
+1. Make sure `JAVA_HOME` is set and point to JDK 11 installation.
+2. Install Rust toolchain. The easiest way is to use
+   [rustup](https://rustup.rs).
+
+## Build & Test
+
+A few common commands are specified in project's `Makefile`:
+
+- `make`: compile the entire project, but don't run tests
+- `make test`: compile the project and run tests in both Rust and Java
+  side.
+- `make release`: compile the project and creates a release build. This
+  is useful when you want to test Comet local installation in another project
+  such as Spark.
+- `make clean`: clean up the workspace
+- `bin/comet-spark-shell -d . -o spark/target/` run Comet spark shell for V1 datasources
+- `bin/comet-spark-shell -d . -o spark/target/ --conf spark.sql.sources.useV1SourceList=""` run Comet spark shell for V2 datasources
+
+## Benchmark
+
+There's a `make` command to run micro benchmarks in the repo. For
+instance:
+
+```
+make benchmark-org.apache.spark.sql.benchmark.CometReadBenchmark
+```
+
+To run TPC-H or TPC-DS micro benchmarks, please follow the instructions
+in the respective source code, e.g., `CometTPCHQueryBenchmark`.
+
+## Debugging
+Comet is a multi-language project with native code written in Rust and JVM code written in Java and Scala.
+It is possible to debug both native and JVM code concurrently as described in the [DEBUGGING guide](DEBUGGING.md)
diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md
new file mode 100644
index 000000000..40e140aba
--- /dev/null
+++ b/EXPRESSIONS.md
@@ -0,0 +1,96 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Expressions Supported by Comet
+
+The following Spark expressions are currently available:
+
++ Literals
++ Arithmetic Operators
+    + UnaryMinus
+    + Add/Minus/Multiply/Divide/Remainder
++ Conditional functions
+    + Case When
+    + If
++ Cast
++ Coalesce
++ Boolean functions
+    + And
+    + Or
+    + Not
+    + EqualTo
+    + EqualNullSafe
+    + GreaterThan
+    + GreaterThanOrEqual
+    + LessThan
+    + LessThanOrEqual
+    + IsNull
+    + IsNotNull
+    + In
++ String functions
+    + Substring
+    + Coalesce
+    + StringSpace
+    + Like
+    + Contains
+    + Startswith
+    + Endswith
+    + Ascii
+    + Bit_length
+    + Octet_length
+    + Upper
+    + Lower
+    + Chr
+    + Initcap
+    + Trim/Btrim/Ltrim/Rtrim
+    + Concat_ws
+    + Repeat
+    + Length
+    + Reverse
+    + Instr
+    + Replace
+    + Translate
++ Bitwise functions
+    + Shiftright/Shiftleft
++ Date/Time functions
+    + Year/Hour/Minute/Second
++ Math functions
+    + Abs
+    + Acos
+    + Asin
+    + Atan
+    + Atan2
+    + Cos
+    + Exp
+    + Ln
+    + Log10
+    + Log2
+    + Pow
+    + Round
+    + Signum
+    + Sin
+    + Sqrt
+    + Tan
+    + Ceil
+    + Floor
++ Aggregate functions
+    + Count
+    + Sum
+    + Max
+    + Min
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 000000000..d74c6b599
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,212 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+This project includes code from Apache Aurora.
+
+* dev/release/{release,changelog,release-candidate} are based on the scripts from
+  Apache Aurora
+
+Copyright: 2016 The Apache Software Foundation.
+Home page: https://aurora.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..475c88736
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+.PHONY: all core jvm test clean release-linux release bench
+
+all: core jvm
+
+core:
+	cd core && cargo build
+jvm:
+	mvn clean package -DskipTests $(PROFILES)
+test:
+	mvn clean
+	# We need to compile CometException so that the cargo test can pass
+	mvn compile -pl common -DskipTests $(PROFILES)
+	cd core && cargo build && \
+	LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}${JAVA_HOME}/lib:${JAVA_HOME}/lib/server:${JAVA_HOME}/lib/jli && \
+	DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH:+${DYLD_LIBRARY_PATH}:}${JAVA_HOME}/lib:${JAVA_HOME}/lib/server:${JAVA_HOME}/lib/jli \
+	RUST_BACKTRACE=1 cargo test
+	SPARK_HOME=`pwd` COMET_CONF_DIR=$(shell pwd)/conf RUST_BACKTRACE=1 mvn verify $(PROFILES)
+clean:
+	cd core && cargo clean
+	mvn clean
+	rm -rf .dist
+bench:
+	cd core && LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}${JAVA_HOME}/lib:${JAVA_HOME}/lib/server:${JAVA_HOME}/lib/jli && \
+	DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH:+${DYLD_LIBRARY_PATH}:}${JAVA_HOME}/lib:${JAVA_HOME}/lib/server:${JAVA_HOME}/lib/jli \
+	RUSTFLAGS="-Ctarget-cpu=native" cargo bench $(filter-out $@,$(MAKECMDGOALS))
+format:
+	mvn compile test-compile scalafix:scalafix -Psemanticdb $(PROFILES)
+	mvn spotless:apply $(PROFILES)
+
+core-amd64:
+	rustup target add x86_64-apple-darwin
+	cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release
+	mkdir -p common/target/classes/org/apache/comet/darwin/x86_64
+	cp core/target/x86_64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/x86_64
+	cd core && RUSTFLAGS="-Ctarget-cpu=haswell -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release
+	mkdir -p common/target/classes/org/apache/comet/linux/amd64
+	cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/amd64
+	jar -cf common/target/comet-native-x86_64.jar \
+		-C common/target/classes/org/apache/comet darwin \
+		-C common/target/classes/org/apache/comet linux
+	./dev/deploy-file common/target/comet-native-x86_64.jar comet-native-x86_64${COMET_CLASSIFIER} jar
+
+core-arm64:
+	rustup target add aarch64-apple-darwin
+	cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release
+	mkdir -p common/target/classes/org/apache/comet/darwin/aarch64
+	cp core/target/aarch64-apple-darwin/release/libcomet.dylib common/target/classes/org/apache/comet/darwin/aarch64
+	cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+	mkdir -p common/target/classes/org/apache/comet/linux/aarch64
+	cp core/target/release/libcomet.so common/target/classes/org/apache/comet/linux/aarch64
+	jar -cf common/target/comet-native-aarch64.jar \
+		-C common/target/classes/org/apache/comet darwin \
+		-C common/target/classes/org/apache/comet linux
+	./dev/deploy-file common/target/comet-native-aarch64.jar comet-native-aarch64${COMET_CLASSIFIER} jar
+
+release-linux: clean
+	rustup target add aarch64-apple-darwin x86_64-apple-darwin
+	cd core && RUSTFLAGS="-Ctarget-cpu=apple-m1" CC=arm64-apple-darwin21.4-clang CXX=arm64-apple-darwin21.4-clang++ CARGO_FEATURE_NEON=1 cargo build --target aarch64-apple-darwin --features nightly --release
+	cd core && RUSTFLAGS="-Ctarget-cpu=skylake -Ctarget-feature=-prefer-256-bit" CC=o64-clang CXX=o64-clang++ cargo build --target x86_64-apple-darwin --features nightly --release
+	cd core && RUSTFLAGS="-Ctarget-cpu=native -Ctarget-feature=-prefer-256-bit" cargo build --features nightly --release
+	mvn install -Prelease -DskipTests $(PROFILES)
+release:
+	cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+	mvn install -Prelease -DskipTests $(PROFILES)
+benchmark-%: clean release
+	cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' .mvn exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES)
+.DEFAULT:
+	@: # ignore arguments provided to benchmarks e.g. "make benchmark-foo -- --bar", we do not want to treat "--bar" as target
diff --git a/README.md b/README.md
index ba486a23f..ba50f673a 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,60 @@
-# arrow-datafusion-comet
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache Arrow DataFusion Comet
+
+Comet is an Apache Spark plugin that uses [Apache Arrow DataFusion](https://arrow.apache.org/datafusion/)
+as native runtime to achieve improvement in terms of query efficiency and query runtime.
+
+On a high level, Comet aims to support:
+- a native Parquet implementation, including both reader and writer
+- full implementation of Spark operators, including
+  Filter/Project/Aggregation/Join/Exchange etc.
+- full implementation of Spark built-in expressions
+- a UDF framework for users to migrate their existing UDF to native
+
+The following diagram illustrates the architecture of Comet:
+
+<a href="url"><img src="doc/comet-overview.png" align="center" height="600" width="750" ></a>
+
+## Current Status
+
+The project is currently integrated into Apache Spark 3.2, 3.3, and 3.4.
+
+## Feature Parity with Apache Spark
+
+The project strives to keep feature parity with Apache Spark, that is,
+users should expect the same behavior (w.r.t features, configurations,
+query results, etc) with Comet turned on or turned off in their Spark
+jobs. In addition, Comet extension should automatically detect unsupported
+features and fallback to Spark engine.
+
+To achieve this, besides unit tests within Comet itself, we also re-use
+Spark SQL tests and make sure they all pass with Comet extension
+enabled.
+
+## Supported Platforms
+
+Linux, Apple OSX (Intel and M1)
+
+## Requirements
+
+- Apache Spark 3.2, 3.3, or 3.4
+- JDK 8 and up
+- GLIBC 2.17 (Centos 7) and up
diff --git a/bin/comet-spark-shell b/bin/comet-spark-shell
new file mode 100755
index 000000000..9ae55a1b3
--- /dev/null
+++ b/bin/comet-spark-shell
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+set -e
+
+if [[ -z ${SPARK_HOME} ]]; then
+  echo "ERROR: SPARK_HOME is not set: please set it to point to a Spark 3.4.0 distribution"
+  exit 1
+fi
+
+POSITIONAL=()
+while [[ $# -gt 0 ]]; do
+  key="$1"
+
+  case $key in
+    -r|--rebuild)
+      REBUILD=YES
+      shift
+      ;;
+    -o|--outdir)
+      COMET_OUTDIR=$2
+      shift
+      shift
+      ;;
+    -d|--comet-dir)
+      COMET_DIR=$2
+      shift
+      shift
+      ;;
+    *)
+      POSITIONAL+=("$1")
+      shift
+      ;;
+  esac
+done
+
+set -- "${POSITIONAL[@]}"
+
+COMET_DIR="${COMET_DIR:-$HOME/git/comet}"
+SPARK_VERSION_SHORT=3.4
+SCALA_BINARY_VERSION=2.12
+COMET_VERSION=0.1.0-SNAPSHOT
+COMET_SPARK_JAR=comet-spark-spark${SPARK_VERSION_SHORT}_${SCALA_BINARY_VERSION}-${COMET_VERSION}.jar
+COMET_OUTDIR="${COMET_OUTDIR:-/tmp}"
+
+if [[ ! -d $COMET_DIR ]]; then
+  echo "Comet repo: $COMET_DIR, doesn't exist"
+  exit 1
+fi
+
+if [[ ! -d $COMET_OUTDIR ]]; then
+  echo "Output directory for Comet Spark library: $COMET_OUTDIR, doesn't exist"
+  exit 1
+fi
+
+if [[ "X$REBUILD" == "XYES" ]]; then
+  cd $COMET_DIR && make release
+  cd $COMET_DIR/spark && cp target/${COMET_SPARK_JAR} $COMET_OUTDIR/${COMET_SPARK_JAR}
+fi
+
+RUST_BACKTRACE=1 $SPARK_HOME/bin/spark-shell \
+  --jars $COMET_OUTDIR/${COMET_SPARK_JAR} \
+  --conf spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions \
+  --conf spark.comet.enabled=true \
+  --conf spark.comet.exec.enabled=true \
+  --conf spark.comet.exec.all.enabled=true \
+$@
diff --git a/common/pom.xml b/common/pom.xml
new file mode 100644
index 000000000..f885c346d
--- /dev/null
+++ b/common/pom.xml
@@ -0,0 +1,200 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.comet</groupId>
+    <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
+    <version>0.1.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>comet-common-spark${spark.version.short}_${scala.binary.version}</artifactId>
+  <name>comet-common</name>
+
+  <properties>
+    <!-- Reverse default (skip installation), and then enable only for child modules -->
+    <maven.deploy.skip>false</maven.deploy.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-column</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory-unsafe</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-c-data</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.assertj</groupId>
+      <artifactId>assertj-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>io.github.git-commit-id</groupId>
+        <artifactId>git-commit-id-maven-plugin</artifactId>
+        <version>5.0.0</version>
+        <executions>
+          <execution>
+            <id>get-the-git-infos</id>
+            <goals>
+              <goal>revision</goal>
+            </goals>
+            <phase>initialize</phase>
+          </execution>
+        </executions>
+        <configuration>
+          <generateGitPropertiesFile>true</generateGitPropertiesFile>
+          <generateGitPropertiesFilename>${project.build.outputDirectory}/comet-git-info.properties</generateGitPropertiesFilename>
+          <commitIdGenerationMode>full</commitIdGenerationMode>
+          <includeOnlyProperties>
+            <includeOnlyProperty>^git.branch$</includeOnlyProperty>
+            <includeOnlyProperty>^git.build.*$</includeOnlyProperty>
+            <includeOnlyProperty>^git.commit.id.(abbrev|full)$</includeOnlyProperty>
+            <includeOnlyProperty>^git.remote.*$</includeOnlyProperty>
+          </includeOnlyProperties>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>true</createSourcesJar>
+              <shadeSourcesContent>true</shadeSourcesContent>
+              <shadedArtifactAttached>false</shadedArtifactAttached>
+              <createDependencyReducedPom>true</createDependencyReducedPom>
+              <artifactSet>
+                <includes>
+                  <!-- We shade & relocation most of the Arrow classes, to prevent them from
+                    conflicting with those in Spark -->
+                  <include>org.apache.arrow:*</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>**/*.thrift</exclude>
+                    <exclude>git.properties</exclude>
+                    <exclude>log4j.properties</exclude>
+                    <exclude>log4j2.properties</exclude>
+                    <exclude>arrow-git.properties</exclude>
+                  </excludes>
+                </filter>
+                <filter>
+                  <artifact>org.apache.arrow:arrow-vector</artifact>
+                  <excludes>
+                    <!-- Comet doesn't need codegen templates on Arrow -->
+                    <exclude>codegen/**</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <relocations>
+                <relocation>
+                  <pattern>org.apache.arrow</pattern>
+                  <shadedPattern>${comet.shade.packageName}.arrow</shadedPattern>
+                  <excludes>
+                    <!-- We can't allocate Jni classes. These classes has no extra dependencies
+                       so it should be OK to exclude -->
+                    <exclude>org/apache/arrow/c/jni/JniWrapper</exclude>
+                    <exclude>org/apache/arrow/c/jni/PrivateData</exclude>
+                    <exclude>org/apache/arrow/c/jni/CDataJniException</exclude>
+                    <!-- Also used by JNI: https://github.com/apache/arrow/blob/apache-arrow-11.0.0/java/c/src/main/cpp/jni_wrapper.cc#L341
+                       Note this class is not used by us, but required when loading the native lib -->
+                    <exclude>org/apache/arrow/c/ArrayStreamExporter$ExportedArrayStreamPrivateData
+                    </exclude>
+                  </excludes>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+    <resources>
+      <resource>
+        <directory>${project.basedir}/src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>${project.basedir}/../core/target/x86_64-apple-darwin/release</directory>
+        <includes>
+          <include>libcomet.dylib</include>
+        </includes>
+        <targetPath>org/apache/comet/darwin/x86_64</targetPath>
+      </resource>
+      <resource>
+        <directory>${project.basedir}/../core/target/aarch64-apple-darwin/release</directory>
+        <includes>
+          <include>libcomet.dylib</include>
+        </includes>
+        <targetPath>org/apache/comet/darwin/aarch64</targetPath>
+      </resource>
+      <resource>
+        <directory>${jni.dir}</directory>
+        <includes>
+          <include>libcomet.dylib</include>
+          <include>libcomet.so</include>
+        </includes>
+        <targetPath>org/apache/comet/${platform}/${arch}</targetPath>
+      </resource>
+    </resources>
+  </build>
+
+</project>
diff --git a/common/src/main/java/org/apache/arrow/c/ArrowImporter.java b/common/src/main/java/org/apache/arrow/c/ArrowImporter.java
new file mode 100644
index 000000000..90398cb72
--- /dev/null
+++ b/common/src/main/java/org/apache/arrow/c/ArrowImporter.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.arrow.c;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * This class is used to import Arrow schema and array from native execution/shuffle. We cannot use
+ * Arrow's Java API to import schema and array directly because Arrow's Java API `Data.importField`
+ * initiates a new `SchemaImporter` for each field. Each `SchemaImporter` maintains an internal
+ * dictionary id counter. So the dictionary ids for multiple dictionary columns will conflict with
+ * each other and cause data corruption.
+ */
+public class ArrowImporter {
+  private final SchemaImporter importer;
+  private final BufferAllocator allocator;
+
+  public ArrowImporter(BufferAllocator allocator) {
+    this.allocator = allocator;
+    this.importer = new SchemaImporter(allocator);
+  }
+
+  Field importField(ArrowSchema schema, CDataDictionaryProvider provider) {
+    Field var4;
+    try {
+      var4 = importer.importField(schema, provider);
+    } finally {
+      schema.release();
+      schema.close();
+    }
+
+    return var4;
+  }
+
+  public FieldVector importVector(
+      ArrowArray array, ArrowSchema schema, CDataDictionaryProvider provider) {
+    Field field = importField(schema, provider);
+    FieldVector vector = field.createVector(allocator);
+    Data.importIntoVector(allocator, array, vector, provider);
+    return vector;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/CometRuntimeException.java b/common/src/main/java/org/apache/comet/CometRuntimeException.java
new file mode 100644
index 000000000..b136ad165
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/CometRuntimeException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet;
+
+/** The parent class for all Comet runtime exceptions */
+public class CometRuntimeException extends RuntimeException {
+  public CometRuntimeException(String message) {
+    super(message);
+  }
+
+  public CometRuntimeException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/NativeBase.java b/common/src/main/java/org/apache/comet/NativeBase.java
new file mode 100644
index 000000000..42357b9bf
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/NativeBase.java
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.sql.comet.util.Utils;
+
+import static org.apache.comet.Constants.LOG_CONF_NAME;
+import static org.apache.comet.Constants.LOG_CONF_PATH;
+
+/** Base class for JNI bindings. MUST be inherited by all classes that introduce JNI APIs. */
+public abstract class NativeBase {
+  static final String ARROW_UNSAFE_MEMORY_ACCESS = "arrow.enable_unsafe_memory_access";
+  static final String ARROW_NULL_CHECK_FOR_GET = "arrow.enable_null_check_for_get";
+
+  private static final Logger LOG = LoggerFactory.getLogger(NativeBase.class);
+  private static final String NATIVE_LIB_NAME = "comet";
+
+  private static final String libraryToLoad = System.mapLibraryName(NATIVE_LIB_NAME);
+  private static boolean loaded = false;
+  private static final String searchPattern = "libcomet-";
+
+  static {
+    if (!isLoaded()) {
+      load();
+    }
+  }
+
+  public static synchronized boolean isLoaded() {
+    return loaded;
+  }
+
+  // Only for testing
+  static synchronized void setLoaded(boolean b) {
+    loaded = b;
+  }
+
+  static synchronized void load() {
+    if (loaded) {
+      return;
+    }
+
+    cleanupOldTempLibs();
+
+    // Check if the arch used by JDK is the same as arch on the host machine, in particular,
+    // whether x86_64 JDK is used in arm64 Mac
+    if (!checkArch()) {
+      LOG.warn(
+          "Comet is disabled. JDK compiled for x86_64 is used in a Mac based on Apple Silicon. "
+              + "In order to use Comet, Please install a JDK version for ARM64 architecture");
+      return;
+    }
+
+    // Try to load Comet library from the java.library.path.
+    try {
+      System.loadLibrary(libraryToLoad);
+      loaded = true;
+    } catch (UnsatisfiedLinkError ex) {
+      // Doesn't exist, so proceed to loading bundled library.
+      bundleLoadLibrary();
+    }
+
+    initWithLogConf();
+    // Only set the Arrow properties when debugging mode is off
+    if (!(boolean) CometConf.COMET_DEBUG_ENABLED().get()) {
+      setArrowProperties();
+    }
+  }
+
+  /**
+   * Use the bundled native libraries. Functionally equivalent to <code>System.loadLibrary</code>.
+   */
+  private static void bundleLoadLibrary() {
+    String resourceName = resourceName();
+    InputStream is = NativeBase.class.getResourceAsStream(resourceName);
+    if (is == null) {
+      throw new UnsupportedOperationException(
+          "Unsupported OS/arch, cannot find "
+              + resourceName
+              + ". Please try building from source.");
+    }
+
+    File tempLib = null;
+    File tempLibLock = null;
+    try {
+      // Create the .lck file first to avoid a race condition
+      // with other concurrently running Java processes using Comet.
+      tempLibLock = File.createTempFile(searchPattern, "." + os().libExtension + ".lck");
+      tempLib = new File(tempLibLock.getAbsolutePath().replaceFirst(".lck$", ""));
+      // copy to tempLib
+      Files.copy(is, tempLib.toPath(), StandardCopyOption.REPLACE_EXISTING);
+      System.load(tempLib.getAbsolutePath());
+      loaded = true;
+    } catch (IOException e) {
+      throw new IllegalStateException("Cannot unpack libcomet: " + e);
+    } finally {
+      if (!loaded) {
+        if (tempLib != null && tempLib.exists()) {
+          if (!tempLib.delete()) {
+            LOG.error(
+                "Cannot unpack libcomet / cannot delete a temporary native library " + tempLib);
+          }
+        }
+        if (tempLibLock != null && tempLibLock.exists()) {
+          if (!tempLibLock.delete()) {
+            LOG.error(
+                "Cannot unpack libcomet / cannot delete a temporary lock file " + tempLibLock);
+          }
+        }
+      } else {
+        tempLib.deleteOnExit();
+        tempLibLock.deleteOnExit();
+      }
+    }
+  }
+
+  private static void initWithLogConf() {
+    String logConfPath = System.getProperty(LOG_CONF_PATH(), Utils.getConfPath(LOG_CONF_NAME()));
+
+    // If both the system property and the environmental variable failed to find a log
+    // configuration, then fall back to using the deployed default
+    if (logConfPath == null) {
+      LOG.info(
+          "Couldn't locate log file from either COMET_CONF_DIR or comet.log.file.path. "
+              + "Using default log configuration which emits to stdout");
+      logConfPath = "";
+    } else {
+      LOG.info("Using {} for native library logging", logConfPath);
+    }
+    init(logConfPath);
+  }
+
+  private static void cleanupOldTempLibs() {
+    String tempFolder = new File(System.getProperty("java.io.tmpdir")).getAbsolutePath();
+    File dir = new File(tempFolder);
+
+    File[] tempLibFiles =
+        dir.listFiles(
+            new FilenameFilter() {
+              public boolean accept(File dir, String name) {
+                return name.startsWith(searchPattern) && !name.endsWith(".lck");
+              }
+            });
+
+    if (tempLibFiles != null) {
+      for (File tempLibFile : tempLibFiles) {
+        File lckFile = new File(tempLibFile.getAbsolutePath() + ".lck");
+        if (!lckFile.exists()) {
+          try {
+            tempLibFile.delete();
+          } catch (SecurityException e) {
+            LOG.error("Failed to delete old temp lib", e);
+          }
+        }
+      }
+    }
+  }
+
+  // Set Arrow related properties upon initializing native, such as enabling unsafe memory access
+  // as well as disabling null check for get, for performance reasons.
+  private static void setArrowProperties() {
+    setPropertyIfNull(ARROW_UNSAFE_MEMORY_ACCESS, "true");
+    setPropertyIfNull(ARROW_NULL_CHECK_FOR_GET, "false");
+  }
+
+  private static void setPropertyIfNull(String key, String value) {
+    if (System.getProperty(key) == null) {
+      LOG.info("Setting system property {} to {}", key, value);
+      System.setProperty(key, value);
+    } else {
+      LOG.info(
+          "Skip setting system property {} to {}, because it is already set to {}",
+          key,
+          value,
+          System.getProperty(key));
+    }
+  }
+
+  private enum OS {
+    // Even on Windows, the default compiler from cpptasks (gcc) uses .so as a shared lib extension
+    WINDOWS("win32", "so"),
+    LINUX("linux", "so"),
+    MAC("darwin", "dylib"),
+    SOLARIS("solaris", "so");
+    public final String name, libExtension;
+
+    OS(String name, String libExtension) {
+      this.name = name;
+      this.libExtension = libExtension;
+    }
+  }
+
+  private static String arch() {
+    return System.getProperty("os.arch");
+  }
+
+  private static OS os() {
+    String osName = System.getProperty("os.name");
+    if (osName.contains("Linux")) {
+      return OS.LINUX;
+    } else if (osName.contains("Mac")) {
+      return OS.MAC;
+    } else if (osName.contains("Windows")) {
+      return OS.WINDOWS;
+    } else if (osName.contains("Solaris") || osName.contains("SunOS")) {
+      return OS.SOLARIS;
+    } else {
+      throw new UnsupportedOperationException("Unsupported operating system: " + osName);
+    }
+  }
+
+  // For some reason users will get JVM crash when running Comet that is compiled for `aarch64`
+  // using a JVM that is compiled against `amd64`. Here we check if that is the case and fallback
+  // to Spark accordingly.
+  private static boolean checkArch() {
+    if (os() == OS.MAC) {
+      try {
+        String javaArch = arch();
+        Process process = Runtime.getRuntime().exec("uname -a");
+        if (process.waitFor() == 0) {
+          BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream()));
+          String line;
+          while ((line = in.readLine()) != null) {
+            if (javaArch.equals("x86_64") && line.contains("ARM64")) {
+              return false;
+            }
+          }
+        }
+      } catch (IOException | InterruptedException e) {
+        LOG.warn("Error parsing host architecture", e);
+      }
+    }
+
+    return true;
+  }
+
+  private static String resourceName() {
+    OS os = os();
+    String packagePrefix = NativeBase.class.getPackage().getName().replace('.', '/');
+
+    return "/" + packagePrefix + "/" + os.name + "/" + arch() + "/" + libraryToLoad;
+  }
+
+  /**
+   * Initialize the native library through JNI.
+   *
+   * @param logConfPath location to the native log configuration file
+   */
+  static native void init(String logConfPath);
+}
diff --git a/common/src/main/java/org/apache/comet/ParquetRuntimeException.java b/common/src/main/java/org/apache/comet/ParquetRuntimeException.java
new file mode 100644
index 000000000..1f81587e7
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/ParquetRuntimeException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet;
+
+/** The parent class for the subset of Comet runtime exceptions related to Parquet. */
+public class ParquetRuntimeException extends CometRuntimeException {
+  public ParquetRuntimeException(String message) {
+    super(message);
+  }
+
+  public ParquetRuntimeException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java
new file mode 100644
index 000000000..099c7b973
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.TimestampNTZType$;
+
+import org.apache.comet.CometConf;
+import org.apache.comet.vector.CometVector;
+
+/** Base class for Comet Parquet column reader implementations. */
+public abstract class AbstractColumnReader implements AutoCloseable {
+  protected static final Logger LOG = LoggerFactory.getLogger(AbstractColumnReader.class);
+
+  /** The Spark data type. */
+  protected final DataType type;
+
+  /** Parquet column descriptor. */
+  protected final ColumnDescriptor descriptor;
+
+  /**
+   * Whether to always return 128 bit decimals, regardless of its precision. If false, this will
+   * return 32, 64 or 128 bit decimals depending on the precision.
+   */
+  protected final boolean useDecimal128;
+
+  /**
+   * Whether to return dates/timestamps that were written with legacy hybrid (Julian + Gregorian)
+   * calendar as it is. If this is true, Comet will return them as it is, instead of rebasing them
+   * to the new Proleptic Gregorian calendar. If this is false, Comet will throw exceptions when
+   * seeing these dates/timestamps.
+   */
+  protected final boolean useLegacyDateTimestamp;
+
+  /** The size of one batch, gets updated by 'readBatch' */
+  protected int batchSize;
+
+  /** A pointer to the native implementation of ColumnReader. */
+  protected long nativeHandle;
+
+  public AbstractColumnReader(
+      DataType type,
+      ColumnDescriptor descriptor,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestamp) {
+    this.type = type;
+    this.descriptor = descriptor;
+    this.useDecimal128 = useDecimal128;
+    this.useLegacyDateTimestamp = useLegacyDateTimestamp;
+    TypeUtil.checkParquetType(descriptor, type);
+  }
+
+  public ColumnDescriptor getDescriptor() {
+    return descriptor;
+  }
+
+  /**
+   * Set the batch size of this reader to be 'batchSize'. Also initializes the native column reader.
+   */
+  public void setBatchSize(int batchSize) {
+    assert nativeHandle == 0
+        : "Native column reader shouldn't be initialized before " + "'setBatchSize' is called";
+    this.batchSize = batchSize;
+    initNative();
+  }
+
+  /**
+   * Reads a batch of 'total' new rows.
+   *
+   * @param total the total number of rows to read
+   */
+  public abstract void readBatch(int total);
+
+  /** Returns the {@link CometVector} read by this reader. */
+  public abstract CometVector currentBatch();
+
+  @Override
+  public void close() {
+    if (nativeHandle != 0) {
+      LOG.debug("Closing the column reader");
+      Native.closeColumnReader(nativeHandle);
+      nativeHandle = 0;
+    }
+  }
+
+  protected void initNative() {
+    LOG.debug("initializing the native column reader");
+    DataType readType = (boolean) CometConf.COMET_SCHEMA_EVOLUTION_ENABLED().get() ? type : null;
+    boolean useLegacyDateTimestampOrNTZ =
+        useLegacyDateTimestamp || type == TimestampNTZType$.MODULE$;
+    nativeHandle =
+        Utils.initColumnReader(
+            descriptor, readType, batchSize, useDecimal128, useLegacyDateTimestampOrNTZ);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/BatchReader.java b/common/src/main/java/org/apache/comet/parquet/BatchReader.java
new file mode 100644
index 000000000..87302b372
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/BatchReader.java
@@ -0,0 +1,620 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import scala.Option;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.parquet.HadoopReadOptions;
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskContext$;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.comet.parquet.CometParquetReadSupport;
+import org.apache.spark.sql.execution.datasources.PartitionedFile;
+import org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter;
+import org.apache.spark.sql.execution.metric.SQLMetric;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+import org.apache.spark.util.AccumulatorV2;
+
+import org.apache.comet.CometConf;
+import org.apache.comet.shims.ShimBatchReader;
+import org.apache.comet.shims.ShimFileFormat;
+import org.apache.comet.vector.CometVector;
+
+/**
+ * A vectorized Parquet reader that reads a Parquet file in a batched fashion.
+ *
+ * <p>Example of how to use this:
+ *
+ * <pre>
+ *   BatchReader reader = new BatchReader(parquetFile, batchSize);
+ *   try {
+ *     reader.init();
+ *     while (reader.readBatch()) {
+ *       ColumnarBatch batch = reader.currentBatch();
+ *       // consume the batch
+ *     }
+ *   } finally { // resources associated with the reader should be released
+ *     reader.close();
+ *   }
+ * </pre>
+ */
+public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Closeable {
+  private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
+
+  private Configuration conf;
+  private int capacity;
+  private boolean isCaseSensitive;
+  private boolean useFieldId;
+  private boolean ignoreMissingIds;
+  private StructType partitionSchema;
+  private InternalRow partitionValues;
+  private PartitionedFile file;
+  private final Map<String, SQLMetric> metrics;
+
+  private long rowsRead;
+  private StructType sparkSchema;
+  private MessageType requestedSchema;
+  private CometVector[] vectors;
+  private AbstractColumnReader[] columnReaders;
+  private ColumnarBatch currentBatch;
+  private Future<Option<Throwable>> prefetchTask;
+  private LinkedBlockingQueue<Pair<PageReadStore, Long>> prefetchQueue;
+  private FileReader fileReader;
+  private boolean[] missingColumns;
+  private boolean isInitialized;
+  private ParquetMetadata footer;
+
+  /** The total number of rows across all row groups of the input split. */
+  private long totalRowCount;
+
+  /**
+   * The total number of rows loaded so far, including all the rows from row groups that we've
+   * processed and the current row group.
+   */
+  private long totalRowsLoaded;
+
+  /**
+   * Whether the native scan should always return decimal represented by 128 bits, regardless of its
+   * precision. Normally, this should be true if native execution is enabled, since Arrow compute
+   * kernels doesn't support 32 and 64 bit decimals yet.
+   */
+  private boolean useDecimal128;
+
+  /** Whether to use the lazy materialization reader for reading columns. */
+  private boolean useLazyMaterialization;
+
+  /**
+   * Whether to return dates/timestamps that were written with legacy hybrid (Julian + Gregorian)
+   * calendar as it is. If this is true, Comet will return them as it is, instead of rebasing them
+   * to the new Proleptic Gregorian calendar. If this is false, Comet will throw exceptions when
+   * seeing these dates/timestamps.
+   */
+  private boolean useLegacyDateTimestamp;
+
+  /** The TaskContext object for executing this task. */
+  private final TaskContext taskContext;
+
+  // Only for testing
+  public BatchReader(String file, int capacity) {
+    this(file, capacity, null, null);
+  }
+
+  // Only for testing
+  public BatchReader(
+      String file, int capacity, StructType partitionSchema, InternalRow partitionValues) {
+    this(new Configuration(), file, capacity, partitionSchema, partitionValues);
+  }
+
+  // Only for testing
+  public BatchReader(
+      Configuration conf,
+      String file,
+      int capacity,
+      StructType partitionSchema,
+      InternalRow partitionValues) {
+    conf.set("spark.sql.parquet.binaryAsString", "false");
+    conf.set("spark.sql.parquet.int96AsTimestamp", "false");
+    conf.set("spark.sql.caseSensitive", "false");
+    conf.set("spark.sql.parquet.inferTimestampNTZ.enabled", "true");
+    conf.set("spark.sql.legacy.parquet.nanosAsLong", "false");
+
+    this.conf = conf;
+    this.capacity = capacity;
+    this.isCaseSensitive = false;
+    this.useFieldId = false;
+    this.ignoreMissingIds = false;
+    this.partitionSchema = partitionSchema;
+    this.partitionValues = partitionValues;
+
+    this.file = ShimBatchReader.newPartitionedFile(partitionValues, file);
+    this.metrics = new HashMap<>();
+
+    this.taskContext = TaskContext$.MODULE$.get();
+  }
+
+  public BatchReader(AbstractColumnReader[] columnReaders) {
+    // Todo: set useDecimal128 and useLazyMaterialization
+    int numColumns = columnReaders.length;
+    this.columnReaders = new AbstractColumnReader[numColumns];
+    vectors = new CometVector[numColumns];
+    currentBatch = new ColumnarBatch(vectors);
+    // This constructor is used by Iceberg only. The columnReaders are
+    // initialized in Iceberg, so no need to call the init()
+    isInitialized = true;
+    this.taskContext = TaskContext$.MODULE$.get();
+    this.metrics = new HashMap<>();
+  }
+
+  BatchReader(
+      Configuration conf,
+      PartitionedFile inputSplit,
+      ParquetMetadata footer,
+      int capacity,
+      StructType sparkSchema,
+      boolean isCaseSensitive,
+      boolean useFieldId,
+      boolean ignoreMissingIds,
+      boolean useLegacyDateTimestamp,
+      StructType partitionSchema,
+      InternalRow partitionValues,
+      Map<String, SQLMetric> metrics) {
+    this.conf = conf;
+    this.capacity = capacity;
+    this.sparkSchema = sparkSchema;
+    this.isCaseSensitive = isCaseSensitive;
+    this.useFieldId = useFieldId;
+    this.ignoreMissingIds = ignoreMissingIds;
+    this.useLegacyDateTimestamp = useLegacyDateTimestamp;
+    this.partitionSchema = partitionSchema;
+    this.partitionValues = partitionValues;
+    this.file = inputSplit;
+    this.footer = footer;
+    this.metrics = metrics;
+    this.taskContext = TaskContext$.MODULE$.get();
+  }
+
+  /**
+   * Initialize this reader. The reason we don't do it in the constructor is that we want to close
+   * any resource hold by this reader when error happens during the initialization.
+   */
+  public void init() throws URISyntaxException, IOException {
+    useDecimal128 =
+        conf.getBoolean(
+            CometConf.COMET_USE_DECIMAL_128().key(),
+            (Boolean) CometConf.COMET_USE_DECIMAL_128().defaultValue().get());
+    useLazyMaterialization =
+        conf.getBoolean(
+            CometConf.COMET_USE_LAZY_MATERIALIZATION().key(),
+            (Boolean) CometConf.COMET_USE_LAZY_MATERIALIZATION().defaultValue().get());
+
+    long start = file.start();
+    long length = file.length();
+    String filePath = file.filePath().toString();
+
+    ParquetReadOptions.Builder builder = HadoopReadOptions.builder(conf, new Path(filePath));
+
+    if (start >= 0 && length >= 0) {
+      builder = builder.withRange(start, start + length);
+    }
+    ParquetReadOptions readOptions = builder.build();
+
+    // TODO: enable off-heap buffer when they are ready
+    ReadOptions cometReadOptions = ReadOptions.builder(conf).build();
+
+    Path path = new Path(new URI(filePath));
+    fileReader =
+        new FileReader(
+            CometInputFile.fromPath(path, conf), footer, readOptions, cometReadOptions, metrics);
+    requestedSchema = fileReader.getFileMetaData().getSchema();
+    MessageType fileSchema = requestedSchema;
+
+    if (sparkSchema == null) {
+      sparkSchema = new ParquetToSparkSchemaConverter(conf).convert(requestedSchema);
+    } else {
+      requestedSchema =
+          CometParquetReadSupport.clipParquetSchema(
+              requestedSchema, sparkSchema, isCaseSensitive, useFieldId, ignoreMissingIds);
+      if (requestedSchema.getColumns().size() != sparkSchema.size()) {
+        throw new IllegalArgumentException(
+            String.format(
+                "Spark schema has %d columns while " + "Parquet schema has %d columns",
+                sparkSchema.size(), requestedSchema.getColumns().size()));
+      }
+    }
+
+    totalRowCount = fileReader.getRecordCount();
+    List<ColumnDescriptor> columns = requestedSchema.getColumns();
+    int numColumns = columns.size();
+    if (partitionSchema != null) numColumns += partitionSchema.size();
+    columnReaders = new AbstractColumnReader[numColumns];
+
+    // Initialize missing columns and use null vectors for them
+    missingColumns = new boolean[columns.size()];
+    List<String[]> paths = requestedSchema.getPaths();
+    StructField[] nonPartitionFields = sparkSchema.fields();
+    for (int i = 0; i < requestedSchema.getFieldCount(); i++) {
+      Type t = requestedSchema.getFields().get(i);
+      Preconditions.checkState(
+          t.isPrimitive() && !t.isRepetition(Type.Repetition.REPEATED),
+          "Complex type is not supported");
+      String[] colPath = paths.get(i);
+      if (nonPartitionFields[i].name().equals(ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME())) {
+        // Values of ROW_INDEX_TEMPORARY_COLUMN_NAME column are always populated with
+        // generated row indexes, rather than read from the file.
+        // TODO(SPARK-40059): Allow users to include columns named
+        //                    FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME in their schemas.
+        long[] rowIndices = fileReader.getRowIndices();
+        columnReaders[i] = new RowIndexColumnReader(nonPartitionFields[i], capacity, rowIndices);
+        missingColumns[i] = true;
+      } else if (fileSchema.containsPath(colPath)) {
+        ColumnDescriptor fd = fileSchema.getColumnDescription(colPath);
+        if (!fd.equals(columns.get(i))) {
+          throw new UnsupportedOperationException("Schema evolution is not supported");
+        }
+        missingColumns[i] = false;
+      } else {
+        if (columns.get(i).getMaxDefinitionLevel() == 0) {
+          throw new IOException(
+              "Required column '"
+                  + Arrays.toString(colPath)
+                  + "' is missing"
+                  + " in data file "
+                  + filePath);
+        }
+        ConstantColumnReader reader =
+            new ConstantColumnReader(nonPartitionFields[i], capacity, useDecimal128);
+        columnReaders[i] = reader;
+        missingColumns[i] = true;
+      }
+    }
+
+    // Initialize constant readers for partition columns
+    if (partitionSchema != null) {
+      StructField[] partitionFields = partitionSchema.fields();
+      for (int i = columns.size(); i < columnReaders.length; i++) {
+        int fieldIndex = i - columns.size();
+        StructField field = partitionFields[fieldIndex];
+        ConstantColumnReader reader =
+            new ConstantColumnReader(field, capacity, partitionValues, fieldIndex, useDecimal128);
+        columnReaders[i] = reader;
+      }
+    }
+
+    vectors = new CometVector[numColumns];
+    currentBatch = new ColumnarBatch(vectors);
+    fileReader.setRequestedSchema(requestedSchema.getColumns());
+
+    // For test purpose only
+    // If the last external accumulator is `NumRowGroupsAccumulator`, the row group number to read
+    // will be updated to the accumulator. So we can check if the row groups are filtered or not
+    // in test case.
+    // Note that this tries to get thread local TaskContext object, if this is called at other
+    // thread, it won't update the accumulator.
+    if (taskContext != null) {
+      Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics().externalAccums().lastOption();
+      if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
+        @SuppressWarnings("unchecked")
+        AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
+        intAccum.add(fileReader.getRowGroups().size());
+      }
+    }
+
+    // Pre-fetching
+    boolean preFetchEnabled =
+        conf.getBoolean(
+            CometConf.COMET_SCAN_PREFETCH_ENABLED().key(),
+            (boolean) CometConf.COMET_SCAN_PREFETCH_ENABLED().defaultValue().get());
+
+    if (preFetchEnabled) {
+      LOG.info("Prefetch enabled for BatchReader.");
+      this.prefetchQueue = new LinkedBlockingQueue<>();
+    }
+
+    isInitialized = true;
+    synchronized (this) {
+      // if prefetch is enabled, `init()` is called in separate thread. When
+      // `BatchReader.nextBatch()` is called asynchronously, it is possibly that
+      // `init()` is not called or finished. We need to hold on `nextBatch` until
+      // initialization of `BatchReader` is done. Once we are close to finish
+      // initialization, we notify the waiting thread of `nextBatch` to continue.
+      notifyAll();
+    }
+  }
+
+  public void setSparkSchema(StructType schema) {
+    this.sparkSchema = schema;
+  }
+
+  public AbstractColumnReader[] getColumnReaders() {
+    return columnReaders;
+  }
+
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
+      throws IOException, InterruptedException {
+    // Do nothing. The initialization work is done in 'init' already.
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException {
+    return nextBatch();
+  }
+
+  @Override
+  public Void getCurrentKey() {
+    return null;
+  }
+
+  @Override
+  public ColumnarBatch getCurrentValue() {
+    return currentBatch();
+  }
+
+  @Override
+  public float getProgress() {
+    return (float) rowsRead / totalRowCount;
+  }
+
+  /**
+   * Returns the current columnar batch being read.
+   *
+   * <p>Note that this must be called AFTER {@link BatchReader#nextBatch()}.
+   */
+  public ColumnarBatch currentBatch() {
+    return currentBatch;
+  }
+
+  // Only for testing
+  public Future<Option<Throwable>> getPrefetchTask() {
+    return this.prefetchTask;
+  }
+
+  // Only for testing
+  public LinkedBlockingQueue<Pair<PageReadStore, Long>> getPrefetchQueue() {
+    return this.prefetchQueue;
+  }
+
+  /**
+   * Loads the next batch of rows.
+   *
+   * @return true if there are no more rows to read, false otherwise.
+   */
+  public boolean nextBatch() throws IOException {
+    if (this.prefetchTask == null) {
+      Preconditions.checkState(isInitialized, "init() should be called first!");
+    } else {
+      // If prefetch is enabled, this reader will be initialized asynchronously from a
+      // different thread. Wait until it is initialized
+      while (!isInitialized) {
+        synchronized (this) {
+          try {
+            // Wait until initialization of current `BatchReader` is finished (i.e., `init()`),
+            // is done. It is possibly that `init()` is done after entering this while loop,
+            // so a short timeout is given.
+            wait(100);
+
+            // Checks if prefetch task is finished. If so, tries to get exception if any.
+            if (prefetchTask.isDone()) {
+              Option<Throwable> exception = prefetchTask.get();
+              if (exception.isDefined()) {
+                throw exception.get();
+              }
+            }
+          } catch (RuntimeException e) {
+            // Spark will check certain exception e.g. `SchemaColumnConvertNotSupportedException`.
+            throw e;
+          } catch (Throwable e) {
+            throw new IOException(e);
+          }
+        }
+      }
+    }
+
+    if (rowsRead >= totalRowCount) return false;
+    boolean hasMore;
+
+    try {
+      hasMore = loadNextRowGroupIfNecessary();
+    } catch (RuntimeException e) {
+      // Spark will check certain exception e.g. `SchemaColumnConvertNotSupportedException`.
+      throw e;
+    } catch (Throwable e) {
+      throw new IOException(e);
+    }
+
+    if (!hasMore) return false;
+    int batchSize = (int) Math.min(capacity, totalRowsLoaded - rowsRead);
+
+    return nextBatch(batchSize);
+  }
+
+  public boolean nextBatch(int batchSize) {
+    long totalDecodeTime = 0, totalLoadTime = 0;
+    for (int i = 0; i < columnReaders.length; i++) {
+      AbstractColumnReader reader = columnReaders[i];
+      long startNs = System.nanoTime();
+      reader.readBatch(batchSize);
+      totalDecodeTime += System.nanoTime() - startNs;
+      startNs = System.nanoTime();
+      vectors[i] = reader.currentBatch();
+      totalLoadTime += System.nanoTime() - startNs;
+    }
+
+    SQLMetric decodeMetric = metrics.get("ParquetNativeDecodeTime");
+    if (decodeMetric != null) {
+      decodeMetric.add(totalDecodeTime);
+    }
+    SQLMetric loadMetric = metrics.get("ParquetNativeLoadTime");
+    if (loadMetric != null) {
+      loadMetric.add(totalLoadTime);
+    }
+
+    currentBatch.setNumRows(batchSize);
+    rowsRead += batchSize;
+    return true;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (columnReaders != null) {
+      for (AbstractColumnReader reader : columnReaders) {
+        if (reader != null) {
+          reader.close();
+        }
+      }
+    }
+    if (fileReader != null) {
+      fileReader.close();
+      fileReader = null;
+    }
+  }
+
+  private boolean loadNextRowGroupIfNecessary() throws Throwable {
+    // More rows can be read from loaded row group. No need to load next one.
+    if (rowsRead != totalRowsLoaded) return true;
+
+    SQLMetric rowGroupTimeMetric = metrics.get("ParquetLoadRowGroupTime");
+    SQLMetric numRowGroupsMetric = metrics.get("ParquetRowGroups");
+    long startNs = System.nanoTime();
+
+    PageReadStore rowGroupReader = null;
+    if (prefetchTask != null && prefetchQueue != null) {
+      // Wait for pre-fetch task to finish.
+      Pair<PageReadStore, Long> rowGroupReaderPair = prefetchQueue.take();
+      rowGroupReader = rowGroupReaderPair.getLeft();
+
+      // Update incremental byte read metric. Because this metric in Spark is maintained
+      // by thread local variable, we need to manually update it.
+      // TODO: We may expose metrics from `FileReader` and get from it directly.
+      long incBytesRead = rowGroupReaderPair.getRight();
+      FileSystem.getAllStatistics().stream()
+          .forEach(statistic -> statistic.incrementBytesRead(incBytesRead));
+    } else {
+      rowGroupReader = fileReader.readNextRowGroup();
+    }
+
+    if (rowGroupTimeMetric != null) {
+      rowGroupTimeMetric.add(System.nanoTime() - startNs);
+    }
+    if (rowGroupReader == null) {
+      return false;
+    }
+    if (numRowGroupsMetric != null) {
+      numRowGroupsMetric.add(1);
+    }
+
+    List<ColumnDescriptor> columns = requestedSchema.getColumns();
+    for (int i = 0; i < columns.size(); i++) {
+      if (missingColumns[i]) continue;
+      if (columnReaders[i] != null) columnReaders[i].close();
+      // TODO: handle tz, datetime & int96 rebase
+      // TODO: consider passing page reader via ctor - however we need to fix the shading issue
+      //   from Iceberg side.
+      DataType dataType = sparkSchema.fields()[i].dataType();
+      ColumnReader reader =
+          Utils.getColumnReader(
+              dataType,
+              columns.get(i),
+              capacity,
+              useDecimal128,
+              useLazyMaterialization,
+              useLegacyDateTimestamp);
+      reader.setPageReader(rowGroupReader.getPageReader(columns.get(i)));
+      columnReaders[i] = reader;
+    }
+    totalRowsLoaded += rowGroupReader.getRowCount();
+    return true;
+  }
+
+  // Submits a prefetch task for this reader.
+  public void submitPrefetchTask(ExecutorService threadPool) {
+    this.prefetchTask = threadPool.submit(new PrefetchTask());
+  }
+
+  // A task for prefetching parquet row groups.
+  private class PrefetchTask implements Callable<Option<Throwable>> {
+    private long getBytesRead() {
+      return FileSystem.getAllStatistics().stream()
+          .mapToLong(s -> s.getThreadStatistics().getBytesRead())
+          .sum();
+    }
+
+    @Override
+    public Option<Throwable> call() throws Exception {
+      // Gets the bytes read so far.
+      long baseline = getBytesRead();
+
+      try {
+        init();
+
+        while (true) {
+          PageReadStore rowGroupReader = fileReader.readNextRowGroup();
+
+          if (rowGroupReader == null) {
+            // Reaches the end of row groups.
+            return Option.empty();
+          } else {
+            long incBytesRead = getBytesRead() - baseline;
+
+            prefetchQueue.add(Pair.of(rowGroupReader, incBytesRead));
+          }
+        }
+      } catch (Throwable e) {
+        // Returns exception thrown from the reader. The reader will re-throw it.
+        return Option.apply(e);
+      } finally {
+        if (fileReader != null) {
+          fileReader.closeStream();
+        }
+      }
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/BloomFilterReader.java b/common/src/main/java/org/apache/comet/parquet/BloomFilterReader.java
new file mode 100644
index 000000000..a23216c7f
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/BloomFilterReader.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.crypto.AesCipher;
+import org.apache.parquet.crypto.InternalColumnDecryptionSetup;
+import org.apache.parquet.crypto.InternalFileDecryptor;
+import org.apache.parquet.crypto.ModuleCipherFactory;
+import org.apache.parquet.crypto.ParquetCryptoRuntimeException;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.filter2.predicate.Operators;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.format.BlockCipher;
+import org.apache.parquet.format.BloomFilterHeader;
+import org.apache.parquet.format.Util;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.io.SeekableInputStream;
+
+public class BloomFilterReader implements FilterPredicate.Visitor<Boolean> {
+  private static final Logger LOG = LoggerFactory.getLogger(BloomFilterReader.class);
+  private static final boolean BLOCK_MIGHT_MATCH = false;
+  private static final boolean BLOCK_CANNOT_MATCH = true;
+
+  private final Map<ColumnPath, ColumnChunkMetaData> columns;
+  private final Map<ColumnPath, BloomFilter> cache = new HashMap<>();
+  private final InternalFileDecryptor fileDecryptor;
+  private final SeekableInputStream inputStream;
+
+  BloomFilterReader(
+      BlockMetaData block, InternalFileDecryptor fileDecryptor, SeekableInputStream inputStream) {
+    this.columns = new HashMap<>();
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      columns.put(column.getPath(), column);
+    }
+    this.fileDecryptor = fileDecryptor;
+    this.inputStream = inputStream;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.Eq<T> eq) {
+    T value = eq.getValue();
+
+    if (value == null) {
+      // the bloom filter bitset contains only non-null values so isn't helpful. this
+      // could check the column stats, but the StatisticsFilter is responsible
+      return BLOCK_MIGHT_MATCH;
+    }
+
+    Operators.Column<T> filterColumn = eq.getColumn();
+    ColumnChunkMetaData meta = columns.get(filterColumn.getColumnPath());
+    if (meta == null) {
+      // the column isn't in this file so all values are null, but the value
+      // must be non-null because of the above check.
+      return BLOCK_CANNOT_MATCH;
+    }
+
+    try {
+      BloomFilter bloomFilter = readBloomFilter(meta);
+      if (bloomFilter != null && !bloomFilter.findHash(bloomFilter.hash(value))) {
+        return BLOCK_CANNOT_MATCH;
+      }
+    } catch (RuntimeException e) {
+      LOG.warn(e.getMessage());
+      return BLOCK_MIGHT_MATCH;
+    }
+
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.NotEq<T> notEq) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.Lt<T> lt) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.LtEq<T> ltEq) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.Gt<T> gt) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public <T extends Comparable<T>> Boolean visit(Operators.GtEq<T> gtEq) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  @Override
+  public Boolean visit(Operators.And and) {
+    return and.getLeft().accept(this) || and.getRight().accept(this);
+  }
+
+  @Override
+  public Boolean visit(Operators.Or or) {
+    return or.getLeft().accept(this) && or.getRight().accept(this);
+  }
+
+  @Override
+  public Boolean visit(Operators.Not not) {
+    throw new IllegalArgumentException(
+        "This predicate "
+            + not
+            + " contains a not! Did you forget"
+            + " to run this predicate through LogicalInverseRewriter?");
+  }
+
+  @Override
+  public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(
+      Operators.UserDefined<T, U> udp) {
+    return visit(udp, false);
+  }
+
+  @Override
+  public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(
+      Operators.LogicalNotUserDefined<T, U> udp) {
+    return visit(udp.getUserDefined(), true);
+  }
+
+  private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(
+      Operators.UserDefined<T, U> ud, boolean inverted) {
+    return BLOCK_MIGHT_MATCH;
+  }
+
+  BloomFilter readBloomFilter(ColumnChunkMetaData meta) {
+    if (cache.containsKey(meta.getPath())) {
+      return cache.get(meta.getPath());
+    }
+    try {
+      if (!cache.containsKey(meta.getPath())) {
+        BloomFilter bloomFilter = readBloomFilterInternal(meta);
+        if (bloomFilter == null) {
+          return null;
+        }
+
+        cache.put(meta.getPath(), bloomFilter);
+      }
+      return cache.get(meta.getPath());
+    } catch (IOException e) {
+      LOG.error("Failed to read Bloom filter data", e);
+    }
+
+    return null;
+  }
+
+  private BloomFilter readBloomFilterInternal(ColumnChunkMetaData meta) throws IOException {
+    long bloomFilterOffset = meta.getBloomFilterOffset();
+    if (bloomFilterOffset < 0) {
+      return null;
+    }
+
+    // Prepare to decrypt Bloom filter (for encrypted columns)
+    BlockCipher.Decryptor bloomFilterDecryptor = null;
+    byte[] bloomFilterHeaderAAD = null;
+    byte[] bloomFilterBitsetAAD = null;
+    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
+      InternalColumnDecryptionSetup columnDecryptionSetup =
+          fileDecryptor.getColumnSetup(meta.getPath());
+      if (columnDecryptionSetup.isEncrypted()) {
+        bloomFilterDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
+        bloomFilterHeaderAAD =
+            AesCipher.createModuleAAD(
+                fileDecryptor.getFileAAD(),
+                ModuleCipherFactory.ModuleType.BloomFilterHeader,
+                meta.getRowGroupOrdinal(),
+                columnDecryptionSetup.getOrdinal(),
+                -1);
+        bloomFilterBitsetAAD =
+            AesCipher.createModuleAAD(
+                fileDecryptor.getFileAAD(),
+                ModuleCipherFactory.ModuleType.BloomFilterBitset,
+                meta.getRowGroupOrdinal(),
+                columnDecryptionSetup.getOrdinal(),
+                -1);
+      }
+    }
+
+    // Read Bloom filter data header.
+    inputStream.seek(bloomFilterOffset);
+    BloomFilterHeader bloomFilterHeader;
+    try {
+      bloomFilterHeader =
+          Util.readBloomFilterHeader(inputStream, bloomFilterDecryptor, bloomFilterHeaderAAD);
+    } catch (IOException e) {
+      LOG.warn("read no bloom filter");
+      return null;
+    }
+
+    int numBytes = bloomFilterHeader.getNumBytes();
+    if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.UPPER_BOUND_BYTES) {
+      LOG.warn("the read bloom filter size is wrong, size is {}", bloomFilterHeader.getNumBytes());
+      return null;
+    }
+
+    if (!bloomFilterHeader.getHash().isSetXXHASH()
+        || !bloomFilterHeader.getAlgorithm().isSetBLOCK()
+        || !bloomFilterHeader.getCompression().isSetUNCOMPRESSED()) {
+      LOG.warn(
+          "the read bloom filter is not supported yet,  algorithm = {}, hash = {}, "
+              + "compression = {}",
+          bloomFilterHeader.getAlgorithm(),
+          bloomFilterHeader.getHash(),
+          bloomFilterHeader.getCompression());
+      return null;
+    }
+
+    byte[] bitset;
+    if (null == bloomFilterDecryptor) {
+      bitset = new byte[numBytes];
+      inputStream.readFully(bitset);
+    } else {
+      bitset = bloomFilterDecryptor.decrypt(inputStream, bloomFilterBitsetAAD);
+      if (bitset.length != numBytes) {
+        throw new ParquetCryptoRuntimeException("Wrong length of decrypted bloom filter bitset");
+      }
+    }
+    return new BlockSplitBloomFilter(bitset);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnIndexReader.java b/common/src/main/java/org/apache/comet/parquet/ColumnIndexReader.java
new file mode 100644
index 000000000..4e4f6ba0d
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/ColumnIndexReader.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.parquet.crypto.AesCipher;
+import org.apache.parquet.crypto.InternalColumnDecryptionSetup;
+import org.apache.parquet.crypto.InternalFileDecryptor;
+import org.apache.parquet.crypto.ModuleCipherFactory;
+import org.apache.parquet.format.BlockCipher;
+import org.apache.parquet.format.Util;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
+import org.apache.parquet.internal.hadoop.metadata.IndexReference;
+import org.apache.parquet.io.SeekableInputStream;
+
+class ColumnIndexReader implements ColumnIndexStore {
+  private static final Logger LOG = LoggerFactory.getLogger(ColumnIndexReader.class);
+
+  // Used for columns are not in this parquet file
+  private static final IndexStore MISSING_INDEX_STORE =
+      new IndexStore() {
+        @Override
+        public ColumnIndex getColumnIndex() {
+          return null;
+        }
+
+        @Override
+        public OffsetIndex getOffsetIndex() {
+          return null;
+        }
+      };
+
+  private static final ColumnIndexReader EMPTY =
+      new ColumnIndexReader(new BlockMetaData(), Collections.emptySet(), null, null) {
+        @Override
+        public ColumnIndex getColumnIndex(ColumnPath column) {
+          return null;
+        }
+
+        @Override
+        public OffsetIndex getOffsetIndex(ColumnPath column) {
+          throw new MissingOffsetIndexException(column);
+        }
+      };
+
+  private final InternalFileDecryptor fileDecryptor;
+  private final SeekableInputStream inputStream;
+  private final Map<ColumnPath, IndexStore> store;
+
+  /**
+   * Creates a column index store which lazily reads column/offset indexes for the columns in paths.
+   * Paths are the set of columns used for the projection.
+   */
+  static ColumnIndexReader create(
+      BlockMetaData block,
+      Set<ColumnPath> paths,
+      InternalFileDecryptor fileDecryptor,
+      SeekableInputStream inputStream) {
+    try {
+      return new ColumnIndexReader(block, paths, fileDecryptor, inputStream);
+    } catch (MissingOffsetIndexException e) {
+      return EMPTY;
+    }
+  }
+
+  private ColumnIndexReader(
+      BlockMetaData block,
+      Set<ColumnPath> paths,
+      InternalFileDecryptor fileDecryptor,
+      SeekableInputStream inputStream) {
+    this.fileDecryptor = fileDecryptor;
+    this.inputStream = inputStream;
+    Map<ColumnPath, IndexStore> store = new HashMap<>();
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      ColumnPath path = column.getPath();
+      if (paths.contains(path)) {
+        store.put(path, new IndexStoreImpl(column));
+      }
+    }
+    this.store = store;
+  }
+
+  @Override
+  public ColumnIndex getColumnIndex(ColumnPath column) {
+    return store.getOrDefault(column, MISSING_INDEX_STORE).getColumnIndex();
+  }
+
+  @Override
+  public OffsetIndex getOffsetIndex(ColumnPath column) {
+    return store.getOrDefault(column, MISSING_INDEX_STORE).getOffsetIndex();
+  }
+
+  private interface IndexStore {
+    ColumnIndex getColumnIndex();
+
+    OffsetIndex getOffsetIndex();
+  }
+
+  private class IndexStoreImpl implements IndexStore {
+    private final ColumnChunkMetaData meta;
+    private ColumnIndex columnIndex;
+    private boolean columnIndexRead;
+    private final OffsetIndex offsetIndex;
+
+    IndexStoreImpl(ColumnChunkMetaData meta) {
+      this.meta = meta;
+      OffsetIndex oi;
+      try {
+        oi = readOffsetIndex(meta);
+      } catch (IOException e) {
+        // If the I/O issue still stands it will fail the reading later;
+        // otherwise we fail the filtering only with a missing offset index.
+        LOG.warn("Unable to read offset index for column {}", meta.getPath(), e);
+        oi = null;
+      }
+      if (oi == null) {
+        throw new MissingOffsetIndexException(meta.getPath());
+      }
+      offsetIndex = oi;
+    }
+
+    @Override
+    public ColumnIndex getColumnIndex() {
+      if (!columnIndexRead) {
+        try {
+          columnIndex = readColumnIndex(meta);
+        } catch (IOException e) {
+          // If the I/O issue still stands it will fail the reading later;
+          // otherwise we fail the filtering only with a missing column index.
+          LOG.warn("Unable to read column index for column {}", meta.getPath(), e);
+        }
+        columnIndexRead = true;
+      }
+      return columnIndex;
+    }
+
+    @Override
+    public OffsetIndex getOffsetIndex() {
+      return offsetIndex;
+    }
+  }
+
+  // Visible for testing
+  ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
+    IndexReference ref = column.getColumnIndexReference();
+    if (ref == null) {
+      return null;
+    }
+    inputStream.seek(ref.getOffset());
+
+    BlockCipher.Decryptor columnIndexDecryptor = null;
+    byte[] columnIndexAAD = null;
+    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
+      InternalColumnDecryptionSetup columnDecryptionSetup =
+          fileDecryptor.getColumnSetup(column.getPath());
+      if (columnDecryptionSetup.isEncrypted()) {
+        columnIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
+        columnIndexAAD =
+            AesCipher.createModuleAAD(
+                fileDecryptor.getFileAAD(),
+                ModuleCipherFactory.ModuleType.ColumnIndex,
+                column.getRowGroupOrdinal(),
+                columnDecryptionSetup.getOrdinal(),
+                -1);
+      }
+    }
+    return ParquetMetadataConverter.fromParquetColumnIndex(
+        column.getPrimitiveType(),
+        Util.readColumnIndex(inputStream, columnIndexDecryptor, columnIndexAAD));
+  }
+
+  // Visible for testing
+  OffsetIndex readOffsetIndex(ColumnChunkMetaData column) throws IOException {
+    IndexReference ref = column.getOffsetIndexReference();
+    if (ref == null) {
+      return null;
+    }
+    inputStream.seek(ref.getOffset());
+
+    BlockCipher.Decryptor offsetIndexDecryptor = null;
+    byte[] offsetIndexAAD = null;
+    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
+      InternalColumnDecryptionSetup columnDecryptionSetup =
+          fileDecryptor.getColumnSetup(column.getPath());
+      if (columnDecryptionSetup.isEncrypted()) {
+        offsetIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
+        offsetIndexAAD =
+            AesCipher.createModuleAAD(
+                fileDecryptor.getFileAAD(),
+                ModuleCipherFactory.ModuleType.OffsetIndex,
+                column.getRowGroupOrdinal(),
+                columnDecryptionSetup.getOrdinal(),
+                -1);
+      }
+    }
+    return ParquetMetadataConverter.fromParquetOffsetIndex(
+        Util.readOffsetIndex(inputStream, offsetIndexDecryptor, offsetIndexAAD));
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnPageReader.java b/common/src/main/java/org/apache/comet/parquet/ColumnPageReader.java
new file mode 100644
index 000000000..744d12830
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/ColumnPageReader.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.util.ArrayDeque;
+import java.util.List;
+import java.util.Queue;
+
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.compression.CompressionCodecFactory;
+import org.apache.parquet.crypto.AesCipher;
+import org.apache.parquet.crypto.ModuleCipherFactory;
+import org.apache.parquet.format.BlockCipher;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.io.ParquetDecodingException;
+
+public class ColumnPageReader implements PageReader {
+  private final CompressionCodecFactory.BytesInputDecompressor decompressor;
+  private final long valueCount;
+  private final Queue<DataPage> compressedPages;
+  private final DictionaryPage compressedDictionaryPage;
+
+  private final OffsetIndex offsetIndex;
+  private final long rowCount;
+  private int pageIndex = 0;
+
+  private final BlockCipher.Decryptor blockDecryptor;
+  private final byte[] dataPageAAD;
+  private final byte[] dictionaryPageAAD;
+
+  ColumnPageReader(
+      CompressionCodecFactory.BytesInputDecompressor decompressor,
+      List<DataPage> compressedPages,
+      DictionaryPage compressedDictionaryPage,
+      OffsetIndex offsetIndex,
+      long rowCount,
+      BlockCipher.Decryptor blockDecryptor,
+      byte[] fileAAD,
+      int rowGroupOrdinal,
+      int columnOrdinal) {
+    this.decompressor = decompressor;
+    this.compressedPages = new ArrayDeque<>(compressedPages);
+    this.compressedDictionaryPage = compressedDictionaryPage;
+    long count = 0;
+    for (DataPage p : compressedPages) {
+      count += p.getValueCount();
+    }
+    this.valueCount = count;
+    this.offsetIndex = offsetIndex;
+    this.rowCount = rowCount;
+    this.blockDecryptor = blockDecryptor;
+
+    if (blockDecryptor != null) {
+      dataPageAAD =
+          AesCipher.createModuleAAD(
+              fileAAD, ModuleCipherFactory.ModuleType.DataPage, rowGroupOrdinal, columnOrdinal, 0);
+      dictionaryPageAAD =
+          AesCipher.createModuleAAD(
+              fileAAD,
+              ModuleCipherFactory.ModuleType.DictionaryPage,
+              rowGroupOrdinal,
+              columnOrdinal,
+              -1);
+    } else {
+      dataPageAAD = null;
+      dictionaryPageAAD = null;
+    }
+  }
+
+  @Override
+  public long getTotalValueCount() {
+    return valueCount;
+  }
+
+  /** Returns the total value count of the current page. */
+  public int getPageValueCount() {
+    return compressedPages.element().getValueCount();
+  }
+
+  /** Skips the current page so it won't be returned by {@link #readPage()} */
+  public void skipPage() {
+    compressedPages.poll();
+    pageIndex++;
+  }
+
+  @Override
+  public DataPage readPage() {
+    final DataPage compressedPage = compressedPages.poll();
+    if (compressedPage == null) {
+      return null;
+    }
+    final int currentPageIndex = pageIndex++;
+
+    if (null != blockDecryptor) {
+      AesCipher.quickUpdatePageAAD(dataPageAAD, getPageOrdinal(currentPageIndex));
+    }
+
+    return compressedPage.accept(
+        new DataPage.Visitor<DataPage>() {
+          @Override
+          public DataPage visit(DataPageV1 dataPageV1) {
+            try {
+              BytesInput bytes = dataPageV1.getBytes();
+              if (null != blockDecryptor) {
+                bytes = BytesInput.from(blockDecryptor.decrypt(bytes.toByteArray(), dataPageAAD));
+              }
+              BytesInput decompressed =
+                  decompressor.decompress(bytes, dataPageV1.getUncompressedSize());
+
+              final DataPageV1 decompressedPage;
+              if (offsetIndex == null) {
+                decompressedPage =
+                    new DataPageV1(
+                        decompressed,
+                        dataPageV1.getValueCount(),
+                        dataPageV1.getUncompressedSize(),
+                        dataPageV1.getStatistics(),
+                        dataPageV1.getRlEncoding(),
+                        dataPageV1.getDlEncoding(),
+                        dataPageV1.getValueEncoding());
+              } else {
+                long firstRowIndex = offsetIndex.getFirstRowIndex(currentPageIndex);
+                decompressedPage =
+                    new DataPageV1(
+                        decompressed,
+                        dataPageV1.getValueCount(),
+                        dataPageV1.getUncompressedSize(),
+                        firstRowIndex,
+                        Math.toIntExact(
+                            offsetIndex.getLastRowIndex(currentPageIndex, rowCount)
+                                - firstRowIndex
+                                + 1),
+                        dataPageV1.getStatistics(),
+                        dataPageV1.getRlEncoding(),
+                        dataPageV1.getDlEncoding(),
+                        dataPageV1.getValueEncoding());
+              }
+              if (dataPageV1.getCrc().isPresent()) {
+                decompressedPage.setCrc(dataPageV1.getCrc().getAsInt());
+              }
+              return decompressedPage;
+            } catch (IOException e) {
+              throw new ParquetDecodingException("could not decompress page", e);
+            }
+          }
+
+          @Override
+          public DataPage visit(DataPageV2 dataPageV2) {
+            if (!dataPageV2.isCompressed() && offsetIndex == null && null == blockDecryptor) {
+              return dataPageV2;
+            }
+            BytesInput pageBytes = dataPageV2.getData();
+
+            if (null != blockDecryptor) {
+              try {
+                pageBytes =
+                    BytesInput.from(blockDecryptor.decrypt(pageBytes.toByteArray(), dataPageAAD));
+              } catch (IOException e) {
+                throw new ParquetDecodingException(
+                    "could not convert page ByteInput to byte array", e);
+              }
+            }
+            if (dataPageV2.isCompressed()) {
+              int uncompressedSize =
+                  Math.toIntExact(
+                      dataPageV2.getUncompressedSize()
+                          - dataPageV2.getDefinitionLevels().size()
+                          - dataPageV2.getRepetitionLevels().size());
+              try {
+                pageBytes = decompressor.decompress(pageBytes, uncompressedSize);
+              } catch (IOException e) {
+                throw new ParquetDecodingException("could not decompress page", e);
+              }
+            }
+
+            if (offsetIndex == null) {
+              return DataPageV2.uncompressed(
+                  dataPageV2.getRowCount(),
+                  dataPageV2.getNullCount(),
+                  dataPageV2.getValueCount(),
+                  dataPageV2.getRepetitionLevels(),
+                  dataPageV2.getDefinitionLevels(),
+                  dataPageV2.getDataEncoding(),
+                  pageBytes,
+                  dataPageV2.getStatistics());
+            } else {
+              return DataPageV2.uncompressed(
+                  dataPageV2.getRowCount(),
+                  dataPageV2.getNullCount(),
+                  dataPageV2.getValueCount(),
+                  offsetIndex.getFirstRowIndex(currentPageIndex),
+                  dataPageV2.getRepetitionLevels(),
+                  dataPageV2.getDefinitionLevels(),
+                  dataPageV2.getDataEncoding(),
+                  pageBytes,
+                  dataPageV2.getStatistics());
+            }
+          }
+        });
+  }
+
+  @Override
+  public DictionaryPage readDictionaryPage() {
+    if (compressedDictionaryPage == null) {
+      return null;
+    }
+    try {
+      BytesInput bytes = compressedDictionaryPage.getBytes();
+      if (null != blockDecryptor) {
+        bytes = BytesInput.from(blockDecryptor.decrypt(bytes.toByteArray(), dictionaryPageAAD));
+      }
+      DictionaryPage decompressedPage =
+          new DictionaryPage(
+              decompressor.decompress(bytes, compressedDictionaryPage.getUncompressedSize()),
+              compressedDictionaryPage.getDictionarySize(),
+              compressedDictionaryPage.getEncoding());
+      if (compressedDictionaryPage.getCrc().isPresent()) {
+        decompressedPage.setCrc(compressedDictionaryPage.getCrc().getAsInt());
+      }
+      return decompressedPage;
+    } catch (IOException e) {
+      throw new ParquetDecodingException("Could not decompress dictionary page", e);
+    }
+  }
+
+  private int getPageOrdinal(int currentPageIndex) {
+    return offsetIndex == null ? currentPageIndex : offsetIndex.getPageOrdinal(currentPageIndex);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
new file mode 100644
index 000000000..7e45f4f9a
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.spark.sql.types.DataType;
+
+import org.apache.comet.CometConf;
+import org.apache.comet.vector.CometDecodedVector;
+import org.apache.comet.vector.CometDictionary;
+import org.apache.comet.vector.CometDictionaryVector;
+import org.apache.comet.vector.CometPlainVector;
+import org.apache.comet.vector.CometVector;
+
+public class ColumnReader extends AbstractColumnReader {
+  protected static final Logger LOG = LoggerFactory.getLogger(ColumnReader.class);
+  protected static final BufferAllocator ALLOCATOR = new RootAllocator();
+
+  /**
+   * The current Comet vector holding all the values read by this column reader. Owned by this
+   * reader and MUST be closed after use.
+   */
+  private CometDecodedVector currentVector;
+
+  /** Dictionary values for this column. Only set if the column is using dictionary encoding. */
+  protected CometDictionary dictionary;
+
+  /** Reader for dictionary & data pages in the current column chunk. */
+  protected PageReader pageReader;
+
+  /** Whether the first data page has been loaded. */
+  private boolean firstPageLoaded = false;
+
+  /**
+   * The number of nulls in the current batch, used when we are skipping importing of Arrow vectors,
+   * in which case we'll simply update the null count of the existing vectors.
+   */
+  int currentNumNulls;
+
+  /**
+   * The number of values in the current batch, used when we are skipping importing of Arrow
+   * vectors, in which case we'll simply update the null count of the existing vectors.
+   */
+  int currentNumValues;
+
+  /**
+   * Whether the last loaded vector contains any null value. This is used to determine if we can
+   * skip vector reloading. If the flag is false, Arrow C API will skip to import the validity
+   * buffer, and therefore we cannot skip vector reloading.
+   */
+  boolean hadNull;
+
+  /** Dictionary provider for this column. */
+  private final CDataDictionaryProvider dictionaryProvider = new CDataDictionaryProvider();
+
+  public ColumnReader(
+      DataType type,
+      ColumnDescriptor descriptor,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestamp) {
+    super(type, descriptor, useDecimal128, useLegacyDateTimestamp);
+    assert batchSize > 0 : "Batch size must be positive, found " + batchSize;
+    this.batchSize = batchSize;
+    initNative();
+  }
+
+  /**
+   * Set the page reader for a new column chunk to read. Expects to call `readBatch` after this.
+   *
+   * @param pageReader the page reader for the new column chunk
+   */
+  public void setPageReader(PageReader pageReader) throws IOException {
+    this.pageReader = pageReader;
+
+    DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
+    if (dictionaryPage != null) {
+      LOG.debug("dictionary page encoding = {}", dictionaryPage.getEncoding());
+      Native.setDictionaryPage(
+          nativeHandle,
+          dictionaryPage.getDictionarySize(),
+          dictionaryPage.getBytes().toByteArray(),
+          dictionaryPage.getEncoding().ordinal());
+    }
+  }
+
+  @Override
+  public void readBatch(int total) {
+    LOG.debug("Start to batch of size = " + total);
+
+    if (!firstPageLoaded) {
+      readPage();
+      firstPageLoaded = true;
+    }
+
+    // Now first reset the current columnar batch so that it can be used to fill in a new batch
+    // of values. Then, keep reading more data pages (via 'readBatch') until the current batch is
+    // full, or we have read 'total' number of values.
+    Native.resetBatch(nativeHandle);
+
+    int left = total, nullsRead = 0;
+    while (left > 0) {
+      int[] array = Native.readBatch(nativeHandle, left);
+      int valuesRead = array[0];
+      nullsRead += array[1];
+      if (valuesRead < left) {
+        readPage();
+      }
+      left -= valuesRead;
+    }
+
+    this.currentNumValues = total;
+    this.currentNumNulls = nullsRead;
+  }
+
+  /** Returns the {@link CometVector} read by this reader. */
+  @Override
+  public CometVector currentBatch() {
+    return loadVector();
+  }
+
+  @Override
+  public void close() {
+    if (currentVector != null) {
+      currentVector.close();
+      currentVector = null;
+    }
+    dictionaryProvider.close();
+    super.close();
+  }
+
+  /** Returns a decoded {@link CometDecodedVector Comet vector}. */
+  public CometDecodedVector loadVector() {
+    // Only re-use Comet vector iff:
+    //   1. if we're not using dictionary encoding, since with dictionary encoding, the native
+    //      side may fallback to plain encoding and the underlying memory address for the vector
+    //      will change as result.
+    //   2. if the column type is of fixed width, in other words, string/binary are not supported
+    //      since the native side may resize the vector and therefore change memory address.
+    //   3. if the last loaded vector contains null values: if values of last vector are all not
+    //      null, Arrow C data API will skip loading the native validity buffer, therefore we
+    //      should not re-use the vector in that case.
+    //   4. if the last loaded vector doesn't contain any null value, but the current vector also
+    //      are all not null, which means we can also re-use the loaded vector.
+    //   5. if the new number of value is the same or smaller
+    if ((hadNull || currentNumNulls == 0)
+        && currentVector != null
+        && dictionary == null
+        && currentVector.isFixedLength()
+        && currentVector.numValues() >= currentNumValues) {
+      currentVector.setNumNulls(currentNumNulls);
+      currentVector.setNumValues(currentNumValues);
+      return currentVector;
+    }
+
+    LOG.debug("Reloading vector");
+
+    // Close the previous vector first to release struct memory allocated to import Arrow array &
+    // schema from native side, through the C data interface
+    if (currentVector != null) {
+      currentVector.close();
+    }
+
+    long[] addresses = Native.currentBatch(nativeHandle);
+
+    try (ArrowArray array = ArrowArray.wrap(addresses[0]);
+        ArrowSchema schema = ArrowSchema.wrap(addresses[1])) {
+      FieldVector vector = Data.importVector(ALLOCATOR, array, schema, dictionaryProvider);
+      DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary();
+
+      CometPlainVector cometVector = new CometPlainVector(vector, useDecimal128);
+
+      // Update whether the current vector contains any null values. This is used in the following
+      // batch(s) to determine whether we can skip loading the native vector.
+      hadNull = cometVector.hasNull();
+
+      if (dictionaryEncoding == null) {
+        if (dictionary != null) {
+          // This means the column was using dictionary encoding but now has fall-back to plain
+          // encoding, on the native side. Setting 'dictionary' to null here, so we can use it as
+          // a condition to check if we can re-use vector later.
+          dictionary = null;
+        }
+        // Either the column is not dictionary encoded, or it was using dictionary encoding but
+        // a new data page has switched back to use plain encoding. For both cases we should
+        // return plain vector.
+        currentVector = cometVector;
+        return currentVector;
+      } else if (dictionary == null) {
+        // There is dictionary from native side but the Java side dictionary hasn't been
+        // initialized yet.
+        Dictionary arrowDictionary = dictionaryProvider.lookup(dictionaryEncoding.getId());
+        CometPlainVector dictionaryVector =
+            new CometPlainVector(arrowDictionary.getVector(), useDecimal128);
+        dictionary = new CometDictionary(dictionaryVector);
+      }
+
+      currentVector =
+          new CometDictionaryVector(cometVector, dictionary, dictionaryProvider, useDecimal128);
+      return currentVector;
+    }
+  }
+
+  protected void readPage() {
+    DataPage page = pageReader.readPage();
+    if (page == null) {
+      throw new RuntimeException("overreading: returned DataPage is null");
+    }
+    ;
+    int pageValueCount = page.getValueCount();
+    page.accept(
+        new DataPage.Visitor<Void>() {
+          @Override
+          public Void visit(DataPageV1 dataPageV1) {
+            LOG.debug("data page encoding = {}", dataPageV1.getValueEncoding());
+            if (dataPageV1.getDlEncoding() != Encoding.RLE
+                && descriptor.getMaxDefinitionLevel() != 0) {
+              throw new UnsupportedOperationException(
+                  "Unsupported encoding: " + dataPageV1.getDlEncoding());
+            }
+            if (!isValidValueEncoding(dataPageV1.getValueEncoding())) {
+              throw new UnsupportedOperationException(
+                  "Unsupported value encoding: " + dataPageV1.getValueEncoding());
+            }
+            try {
+              boolean useDirectBuffer =
+                  (Boolean) CometConf.COMET_PARQUET_ENABLE_DIRECT_BUFFER().get();
+              if (useDirectBuffer) {
+                ByteBuffer buffer = dataPageV1.getBytes().toByteBuffer();
+                Native.setPageBufferV1(
+                    nativeHandle, pageValueCount, buffer, dataPageV1.getValueEncoding().ordinal());
+              } else {
+                byte[] array = dataPageV1.getBytes().toByteArray();
+                Native.setPageV1(
+                    nativeHandle, pageValueCount, array, dataPageV1.getValueEncoding().ordinal());
+              }
+            } catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+            return null;
+          }
+
+          @Override
+          public Void visit(DataPageV2 dataPageV2) {
+            if (!isValidValueEncoding(dataPageV2.getDataEncoding())) {
+              throw new UnsupportedOperationException(
+                  "Unsupported encoding: " + dataPageV2.getDataEncoding());
+            }
+            try {
+              Native.setPageV2(
+                  nativeHandle,
+                  pageValueCount,
+                  dataPageV2.getDefinitionLevels().toByteArray(),
+                  dataPageV2.getRepetitionLevels().toByteArray(),
+                  dataPageV2.getData().toByteArray(),
+                  dataPageV2.getDataEncoding().ordinal());
+            } catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+            return null;
+          }
+        });
+  }
+
+  @SuppressWarnings("deprecation")
+  private boolean isValidValueEncoding(Encoding encoding) {
+    switch (encoding) {
+      case PLAIN:
+      case RLE_DICTIONARY:
+      case PLAIN_DICTIONARY:
+        return true;
+      default:
+        return false;
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/CometInputFile.java b/common/src/main/java/org/apache/comet/parquet/CometInputFile.java
new file mode 100644
index 000000000..eb54d1a72
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/CometInputFile.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FutureDataInputStreamBuilder;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.VersionInfo;
+import org.apache.parquet.hadoop.util.HadoopStreams;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.SeekableInputStream;
+
+/**
+ * A Parquet {@link InputFile} implementation that's similar to {@link
+ * org.apache.parquet.hadoop.util.HadoopInputFile}, but with optimizations introduced in Hadoop 3.x,
+ * for S3 specifically.
+ */
+public class CometInputFile implements InputFile {
+  private static final String MAJOR_MINOR_REGEX = "^(\\d+)\\.(\\d+)(\\..*)?$";
+  private static final Pattern VERSION_MATCHER = Pattern.compile(MAJOR_MINOR_REGEX);
+
+  private final FileSystem fs;
+  private final FileStatus stat;
+  private final Configuration conf;
+
+  public static CometInputFile fromPath(Path path, Configuration conf) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    return new CometInputFile(fs, fs.getFileStatus(path), conf);
+  }
+
+  private CometInputFile(FileSystem fs, FileStatus stat, Configuration conf) {
+    this.fs = fs;
+    this.stat = stat;
+    this.conf = conf;
+  }
+
+  @Override
+  public long getLength() {
+    return stat.getLen();
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  public FileSystem getFileSystem() {
+    return this.fs;
+  }
+
+  public Path getPath() {
+    return stat.getPath();
+  }
+
+  @Override
+  public SeekableInputStream newStream() throws IOException {
+    FSDataInputStream stream;
+    try {
+      if (isAtLeastHadoop33()) {
+        // If Hadoop version is >= 3.3.x, we'll use the 'openFile' API which can save a
+        // HEAD request from cloud storages like S3
+        FutureDataInputStreamBuilder inputStreamBuilder =
+            fs.openFile(stat.getPath()).withFileStatus(stat);
+
+        if (stat.getPath().toString().startsWith("s3a")) {
+          // Switch to random S3 input policy so that we don't do sequential read on the entire
+          // S3 object. By default, the policy is normal which does sequential read until a back
+          // seek happens, which in our case will never happen.
+          inputStreamBuilder =
+              inputStreamBuilder.opt("fs.s3a.experimental.input.fadvise", "random");
+        }
+        stream = inputStreamBuilder.build().get();
+      } else {
+        stream = fs.open(stat.getPath());
+      }
+    } catch (Exception e) {
+      throw new IOException("Error when opening file " + stat.getPath(), e);
+    }
+    return HadoopStreams.wrap(stream);
+  }
+
+  public SeekableInputStream newStream(long offset, long length) throws IOException {
+    try {
+      FSDataInputStream stream;
+      if (isAtLeastHadoop33()) {
+        FutureDataInputStreamBuilder inputStreamBuilder =
+            fs.openFile(stat.getPath()).withFileStatus(stat);
+
+        if (stat.getPath().toString().startsWith("s3a")) {
+          // Switch to random S3 input policy so that we don't do sequential read on the entire
+          // S3 object. By default, the policy is normal which does sequential read until a back
+          // seek happens, which in our case will never happen.
+          //
+          // Also set read ahead length equal to the column chunk length so we don't have to open
+          // multiple S3 http connections.
+          inputStreamBuilder =
+              inputStreamBuilder
+                  .opt("fs.s3a.experimental.input.fadvise", "random")
+                  .opt("fs.s3a.readahead.range", Long.toString(length));
+        }
+
+        stream = inputStreamBuilder.build().get();
+      } else {
+        stream = fs.open(stat.getPath());
+      }
+      return HadoopStreams.wrap(stream);
+    } catch (Exception e) {
+      throw new IOException(
+          "Error when opening file " + stat.getPath() + ", offset=" + offset + ", length=" + length,
+          e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return stat.getPath().toString();
+  }
+
+  private static boolean isAtLeastHadoop33() {
+    String version = VersionInfo.getVersion();
+    return CometInputFile.isAtLeastHadoop33(version);
+  }
+
+  static boolean isAtLeastHadoop33(String version) {
+    Matcher matcher = VERSION_MATCHER.matcher(version);
+    if (matcher.matches()) {
+      if (matcher.group(1).equals("3")) {
+        int minorVersion = Integer.parseInt(matcher.group(2));
+        return minorVersion >= 3;
+      }
+    }
+    return false;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java
new file mode 100644
index 000000000..8de2376f9
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.math.BigInteger;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.unsafe.types.UTF8String;
+
+import org.apache.comet.shims.ShimResolveDefaultColumns;
+
+/**
+ * A column reader that always return constant vectors. Used for reading partition columns, for
+ * instance.
+ */
+public class ConstantColumnReader extends MetadataColumnReader {
+  /** Whether all the values in this constant column are nulls */
+  private boolean isNull;
+
+  /** The constant value in the format of Object that are used to initialize this column reader. */
+  private Object value;
+
+  public ConstantColumnReader(StructField field, int batchSize, boolean useDecimal128) {
+    this(field.dataType(), TypeUtil.convertToParquet(field), batchSize, useDecimal128);
+    this.value = ShimResolveDefaultColumns.getExistenceDefaultValue(field);
+    init(value);
+  }
+
+  public ConstantColumnReader(
+      StructField field, int batchSize, InternalRow values, int index, boolean useDecimal128) {
+    this(field.dataType(), TypeUtil.convertToParquet(field), batchSize, useDecimal128);
+    init(values, index);
+  }
+
+  public ConstantColumnReader(
+      DataType type, ColumnDescriptor descriptor, Object value, boolean useDecimal128) {
+    super(type, descriptor, useDecimal128);
+    this.value = value;
+  }
+
+  ConstantColumnReader(
+      DataType type, ColumnDescriptor descriptor, int batchSize, boolean useDecimal128) {
+    super(type, descriptor, useDecimal128);
+    this.batchSize = batchSize;
+    initNative();
+  }
+
+  @Override
+  public void setBatchSize(int batchSize) {
+    super.setBatchSize(batchSize);
+    init(value);
+  }
+
+  @Override
+  public void readBatch(int total) {
+    super.readBatch(total);
+    if (isNull) setNumNulls(total);
+  }
+
+  private void init(InternalRow values, int index) {
+    Object value = values.get(index, type);
+    init(value);
+  }
+
+  private void init(Object value) {
+    if (value == null) {
+      Native.setNull(nativeHandle);
+      isNull = true;
+    } else if (type == DataTypes.BooleanType) {
+      Native.setBoolean(nativeHandle, (boolean) value);
+    } else if (type == DataTypes.ByteType) {
+      Native.setByte(nativeHandle, (byte) value);
+    } else if (type == DataTypes.ShortType) {
+      Native.setShort(nativeHandle, (short) value);
+    } else if (type == DataTypes.IntegerType) {
+      Native.setInt(nativeHandle, (int) value);
+    } else if (type == DataTypes.LongType) {
+      Native.setLong(nativeHandle, (long) value);
+    } else if (type == DataTypes.FloatType) {
+      Native.setFloat(nativeHandle, (float) value);
+    } else if (type == DataTypes.DoubleType) {
+      Native.setDouble(nativeHandle, (double) value);
+    } else if (type == DataTypes.BinaryType) {
+      Native.setBinary(nativeHandle, (byte[]) value);
+    } else if (type == DataTypes.StringType) {
+      Native.setBinary(nativeHandle, ((UTF8String) value).getBytes());
+    } else if (type == DataTypes.DateType) {
+      Native.setInt(nativeHandle, (int) value);
+    } else if (type == DataTypes.TimestampType || type == TimestampNTZType$.MODULE$) {
+      Native.setLong(nativeHandle, (long) value);
+    } else if (type instanceof DecimalType) {
+      DecimalType dt = (DecimalType) type;
+      Decimal d = (Decimal) value;
+      if (!useDecimal128 && dt.precision() <= Decimal.MAX_INT_DIGITS()) {
+        Native.setInt(nativeHandle, ((int) d.toUnscaledLong()));
+      } else if (!useDecimal128 && dt.precision() <= Decimal.MAX_LONG_DIGITS()) {
+        Native.setLong(nativeHandle, d.toUnscaledLong());
+      } else {
+        final BigInteger integer = d.toJavaBigDecimal().unscaledValue();
+        byte[] bytes = integer.toByteArray();
+        Native.setDecimal(nativeHandle, bytes);
+      }
+    } else {
+      throw new UnsupportedOperationException("Unsupported Spark type: " + type);
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/DictionaryPageReader.java b/common/src/main/java/org/apache/comet/parquet/DictionaryPageReader.java
new file mode 100644
index 000000000..b4b5a8fc9
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/DictionaryPageReader.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.DictionaryPageReadStore;
+import org.apache.parquet.compression.CompressionCodecFactory;
+import org.apache.parquet.crypto.AesCipher;
+import org.apache.parquet.crypto.InternalColumnDecryptionSetup;
+import org.apache.parquet.crypto.InternalFileDecryptor;
+import org.apache.parquet.crypto.ModuleCipherFactory;
+import org.apache.parquet.format.BlockCipher;
+import org.apache.parquet.format.DictionaryPageHeader;
+import org.apache.parquet.format.PageHeader;
+import org.apache.parquet.format.Util;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.io.SeekableInputStream;
+
+public class DictionaryPageReader implements DictionaryPageReadStore {
+  private final Map<String, Optional<DictionaryPage>> cache;
+  private final InternalFileDecryptor fileDecryptor;
+  private final SeekableInputStream inputStream;
+  private final ParquetReadOptions options;
+  private final Map<String, ColumnChunkMetaData> columns;
+
+  DictionaryPageReader(
+      BlockMetaData block,
+      InternalFileDecryptor fileDecryptor,
+      SeekableInputStream inputStream,
+      ParquetReadOptions options) {
+    this.columns = new HashMap<>();
+    this.cache = new ConcurrentHashMap<>();
+    this.fileDecryptor = fileDecryptor;
+    this.inputStream = inputStream;
+    this.options = options;
+
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      columns.put(column.getPath().toDotString(), column);
+    }
+  }
+
+  @Override
+  public DictionaryPage readDictionaryPage(ColumnDescriptor descriptor) {
+    String dotPath = String.join(".", descriptor.getPath());
+    ColumnChunkMetaData column = columns.get(dotPath);
+
+    if (column == null) {
+      throw new ParquetDecodingException("Failed to load dictionary, unknown column: " + dotPath);
+    }
+
+    return cache
+        .computeIfAbsent(
+            dotPath,
+            key -> {
+              try {
+                final DictionaryPage dict =
+                    column.hasDictionaryPage() ? readDictionary(column) : null;
+
+                // Copy the dictionary to ensure it can be reused if it is returned
+                // more than once. This can happen when a DictionaryFilter has two or
+                // more predicates for the same column. Cache misses as well.
+                return (dict != null) ? Optional.of(reusableCopy(dict)) : Optional.empty();
+              } catch (IOException e) {
+                throw new ParquetDecodingException("Failed to read dictionary", e);
+              }
+            })
+        .orElse(null);
+  }
+
+  DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
+    if (!meta.hasDictionaryPage()) {
+      return null;
+    }
+
+    if (inputStream.getPos() != meta.getStartingPos()) {
+      inputStream.seek(meta.getStartingPos());
+    }
+
+    boolean encryptedColumn = false;
+    InternalColumnDecryptionSetup columnDecryptionSetup = null;
+    byte[] dictionaryPageAAD = null;
+    BlockCipher.Decryptor pageDecryptor = null;
+    if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
+      columnDecryptionSetup = fileDecryptor.getColumnSetup(meta.getPath());
+      if (columnDecryptionSetup.isEncrypted()) {
+        encryptedColumn = true;
+      }
+    }
+
+    PageHeader pageHeader;
+    if (!encryptedColumn) {
+      pageHeader = Util.readPageHeader(inputStream);
+    } else {
+      byte[] dictionaryPageHeaderAAD =
+          AesCipher.createModuleAAD(
+              fileDecryptor.getFileAAD(),
+              ModuleCipherFactory.ModuleType.DictionaryPageHeader,
+              meta.getRowGroupOrdinal(),
+              columnDecryptionSetup.getOrdinal(),
+              -1);
+      pageHeader =
+          Util.readPageHeader(
+              inputStream, columnDecryptionSetup.getMetaDataDecryptor(), dictionaryPageHeaderAAD);
+      dictionaryPageAAD =
+          AesCipher.createModuleAAD(
+              fileDecryptor.getFileAAD(),
+              ModuleCipherFactory.ModuleType.DictionaryPage,
+              meta.getRowGroupOrdinal(),
+              columnDecryptionSetup.getOrdinal(),
+              -1);
+      pageDecryptor = columnDecryptionSetup.getDataDecryptor();
+    }
+
+    if (!pageHeader.isSetDictionary_page_header()) {
+      return null;
+    }
+
+    DictionaryPage compressedPage =
+        readCompressedDictionary(pageHeader, inputStream, pageDecryptor, dictionaryPageAAD);
+    CompressionCodecFactory.BytesInputDecompressor decompressor =
+        options.getCodecFactory().getDecompressor(meta.getCodec());
+
+    return new DictionaryPage(
+        decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
+        compressedPage.getDictionarySize(),
+        compressedPage.getEncoding());
+  }
+
+  private DictionaryPage readCompressedDictionary(
+      PageHeader pageHeader,
+      SeekableInputStream fin,
+      BlockCipher.Decryptor pageDecryptor,
+      byte[] dictionaryPageAAD)
+      throws IOException {
+    DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header();
+
+    int uncompressedPageSize = pageHeader.getUncompressed_page_size();
+    int compressedPageSize = pageHeader.getCompressed_page_size();
+
+    byte[] dictPageBytes = new byte[compressedPageSize];
+    fin.readFully(dictPageBytes);
+
+    BytesInput bin = BytesInput.from(dictPageBytes);
+
+    if (null != pageDecryptor) {
+      bin = BytesInput.from(pageDecryptor.decrypt(bin.toByteArray(), dictionaryPageAAD));
+    }
+
+    return new DictionaryPage(
+        bin,
+        uncompressedPageSize,
+        dictHeader.getNum_values(),
+        org.apache.parquet.column.Encoding.valueOf(dictHeader.getEncoding().name()));
+  }
+
+  private static DictionaryPage reusableCopy(DictionaryPage dict) throws IOException {
+    return new DictionaryPage(
+        BytesInput.from(dict.getBytes().toByteArray()),
+        dict.getDictionarySize(),
+        dict.getEncoding());
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/FileReader.java b/common/src/main/java/org/apache/comet/parquet/FileReader.java
new file mode 100644
index 000000000..eddaf3f1a
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/FileReader.java
@@ -0,0 +1,1151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Method;
+import java.net.URI;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.zip.CRC32;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.bytes.ByteBufferInputStream;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.compression.CompressionCodecFactory;
+import org.apache.parquet.crypto.AesCipher;
+import org.apache.parquet.crypto.FileDecryptionProperties;
+import org.apache.parquet.crypto.InternalColumnDecryptionSetup;
+import org.apache.parquet.crypto.InternalFileDecryptor;
+import org.apache.parquet.crypto.ModuleCipherFactory;
+import org.apache.parquet.crypto.ParquetCryptoRuntimeException;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.format.BlockCipher;
+import org.apache.parquet.format.DataPageHeader;
+import org.apache.parquet.format.DataPageHeaderV2;
+import org.apache.parquet.format.DictionaryPageHeader;
+import org.apache.parquet.format.FileCryptoMetaData;
+import org.apache.parquet.format.PageHeader;
+import org.apache.parquet.format.Util;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.io.SeekableInputStream;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.spark.sql.execution.metric.SQLMetric;
+
+import static org.apache.parquet.hadoop.ParquetFileWriter.EFMAGIC;
+import static org.apache.parquet.hadoop.ParquetFileWriter.MAGIC;
+
+import static org.apache.comet.parquet.RowGroupFilter.FilterLevel.BLOOMFILTER;
+import static org.apache.comet.parquet.RowGroupFilter.FilterLevel.DICTIONARY;
+import static org.apache.comet.parquet.RowGroupFilter.FilterLevel.STATISTICS;
+
+/**
+ * A Parquet file reader. Mostly followed {@code ParquetFileReader} in {@code parquet-mr}, but with
+ * customizations & optimizations for Comet.
+ */
+public class FileReader implements Closeable {
+  private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
+
+  private final ParquetMetadataConverter converter;
+  protected final SeekableInputStream f;
+  private final InputFile file;
+  private final Map<String, SQLMetric> metrics;
+  private final Map<ColumnPath, ColumnDescriptor> paths = new HashMap<>();
+  private final FileMetaData fileMetaData; // may be null
+  private final List<BlockMetaData> blocks;
+  private final List<ColumnIndexReader> blockIndexStores;
+  private final List<RowRanges> blockRowRanges;
+  private final CRC32 crc;
+  private final ParquetMetadata footer;
+
+  /**
+   * Read configurations come from two options: - options: these are options defined & specified
+   * from 'parquet-mr' library - cometOptions: these are Comet-specific options, for the features
+   * introduced in Comet's Parquet implementation
+   */
+  private final ParquetReadOptions options;
+
+  private final ReadOptions cometOptions;
+
+  private int currentBlock = 0;
+  private RowGroupReader currentRowGroup = null;
+  private InternalFileDecryptor fileDecryptor;
+
+  public FileReader(InputFile file, ParquetReadOptions options, ReadOptions cometOptions)
+      throws IOException {
+    this(file, null, options, cometOptions, null);
+  }
+
+  public FileReader(
+      InputFile file,
+      ParquetReadOptions options,
+      ReadOptions cometOptions,
+      Map<String, SQLMetric> metrics)
+      throws IOException {
+    this(file, null, options, cometOptions, metrics);
+  }
+
+  public FileReader(
+      InputFile file,
+      ParquetMetadata footer,
+      ParquetReadOptions options,
+      ReadOptions cometOptions,
+      Map<String, SQLMetric> metrics)
+      throws IOException {
+    this.converter = new ParquetMetadataConverter(options);
+    this.file = file;
+    this.f = file.newStream();
+    this.options = options;
+    this.cometOptions = cometOptions;
+    this.metrics = metrics;
+    if (footer == null) {
+      try {
+        footer = readFooter(file, options, f, converter);
+      } catch (Exception e) {
+        // In case that reading footer throws an exception in the constructor, the new stream
+        // should be closed. Otherwise, there's no way to close this outside.
+        f.close();
+        throw e;
+      }
+    }
+    this.footer = footer;
+    this.fileMetaData = footer.getFileMetaData();
+    this.fileDecryptor = fileMetaData.getFileDecryptor(); // must be called before filterRowGroups!
+    if (null != fileDecryptor && fileDecryptor.plaintextFile()) {
+      this.fileDecryptor = null; // Plaintext file. No need in decryptor
+    }
+
+    this.blocks = filterRowGroups(footer.getBlocks());
+    this.blockIndexStores = listWithNulls(this.blocks.size());
+    this.blockRowRanges = listWithNulls(this.blocks.size());
+    for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
+      paths.put(ColumnPath.get(col.getPath()), col);
+    }
+    this.crc = options.usePageChecksumVerification() ? new CRC32() : null;
+  }
+
+  /** Returns the footer of the Parquet file being read. */
+  public ParquetMetadata getFooter() {
+    return this.footer;
+  }
+
+  /** Returns the metadata of the Parquet file being read. */
+  public FileMetaData getFileMetaData() {
+    return this.fileMetaData;
+  }
+
+  /** Returns the input stream of the Parquet file being read. */
+  public SeekableInputStream getInputStream() {
+    return this.f;
+  }
+
+  /** Returns the Parquet options for reading the file. */
+  public ParquetReadOptions getOptions() {
+    return this.options;
+  }
+
+  /** Returns all the row groups of this reader (after applying row group filtering). */
+  public List<BlockMetaData> getRowGroups() {
+    return blocks;
+  }
+
+  /** Sets the projected columns to be read later via {@link #readNextRowGroup()} */
+  public void setRequestedSchema(List<ColumnDescriptor> projection) {
+    paths.clear();
+    for (ColumnDescriptor col : projection) {
+      paths.put(ColumnPath.get(col.getPath()), col);
+    }
+  }
+
+  /**
+   * Gets the total number of records across all row groups (after applying row group filtering).
+   */
+  public long getRecordCount() {
+    long total = 0;
+    for (BlockMetaData block : blocks) {
+      total += block.getRowCount();
+    }
+    return total;
+  }
+
+  /**
+   * Gets the total number of records across all row groups (after applying both row group filtering
+   * and page-level column index filtering).
+   */
+  public long getFilteredRecordCount() {
+    if (!options.useColumnIndexFilter()
+        || !FilterCompat.isFilteringRequired(options.getRecordFilter())) {
+      return getRecordCount();
+    }
+    long total = 0;
+    for (int i = 0, n = blocks.size(); i < n; ++i) {
+      total += getRowRanges(i).rowCount();
+    }
+    return total;
+  }
+
+  /** Skips the next row group. Returns false if there's no row group to skip. Otherwise, true. */
+  public boolean skipNextRowGroup() {
+    return advanceToNextBlock();
+  }
+
+  /**
+   * Returns the next row group to read (after applying row group filtering), or null if there's no
+   * more row group.
+   */
+  public PageReadStore readNextRowGroup() throws IOException {
+    if (currentBlock == blocks.size()) {
+      return null;
+    }
+    BlockMetaData block = blocks.get(currentBlock);
+    if (block.getRowCount() == 0) {
+      throw new RuntimeException("Illegal row group of 0 rows");
+    }
+    this.currentRowGroup = new RowGroupReader(block.getRowCount());
+    // prepare the list of consecutive parts to read them in one scan
+    List<ConsecutivePartList> allParts = new ArrayList<>();
+    ConsecutivePartList currentParts = null;
+    for (ColumnChunkMetaData mc : block.getColumns()) {
+      ColumnPath pathKey = mc.getPath();
+      ColumnDescriptor columnDescriptor = paths.get(pathKey);
+      if (columnDescriptor != null) {
+        BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
+        long startingPos = mc.getStartingPos();
+        boolean mergeRanges = cometOptions.isIOMergeRangesEnabled();
+        int mergeRangeDelta = cometOptions.getIOMergeRangesDelta();
+
+        // start a new list if -
+        //   it is the first part or
+        //   the part is consecutive or
+        //   the part is not consecutive but within the merge range
+        if (currentParts == null
+            || (!mergeRanges && currentParts.endPos() != startingPos)
+            || (mergeRanges && startingPos - currentParts.endPos() > mergeRangeDelta)) {
+          currentParts = new ConsecutivePartList(startingPos);
+          allParts.add(currentParts);
+        }
+        // if we are in a consecutive part list and there is a gap in between the parts,
+        // we treat the gap as a skippable chunk
+        long delta = startingPos - currentParts.endPos();
+        if (mergeRanges && delta > 0 && delta <= mergeRangeDelta) {
+          // add a chunk that will be skipped because it has no column descriptor
+          currentParts.addChunk(new ChunkDescriptor(null, null, startingPos, delta));
+        }
+        currentParts.addChunk(
+            new ChunkDescriptor(columnDescriptor, mc, startingPos, mc.getTotalSize()));
+      }
+    }
+    // actually read all the chunks
+    return readChunks(block, allParts, new ChunkListBuilder());
+  }
+
+  /**
+   * Returns the next row group to read (after applying both row group filtering and page level
+   * column index filtering), or null if there's no more row group.
+   */
+  public PageReadStore readNextFilteredRowGroup() throws IOException {
+    if (currentBlock == blocks.size()) {
+      return null;
+    }
+    if (!options.useColumnIndexFilter()
+        || !FilterCompat.isFilteringRequired(options.getRecordFilter())) {
+      return readNextRowGroup();
+    }
+    BlockMetaData block = blocks.get(currentBlock);
+    if (block.getRowCount() == 0) {
+      throw new RuntimeException("Illegal row group of 0 rows");
+    }
+    ColumnIndexStore ciStore = getColumnIndexReader(currentBlock);
+    RowRanges rowRanges = getRowRanges(currentBlock);
+    long rowCount = rowRanges.rowCount();
+    if (rowCount == 0) {
+      // There are no matching rows -> skipping this row-group
+      advanceToNextBlock();
+      return readNextFilteredRowGroup();
+    }
+    if (rowCount == block.getRowCount()) {
+      // All rows are matching -> fall back to the non-filtering path
+      return readNextRowGroup();
+    }
+
+    this.currentRowGroup = new RowGroupReader(rowRanges);
+    // prepare the list of consecutive parts to read them in one scan
+    ChunkListBuilder builder = new ChunkListBuilder();
+    List<ConsecutivePartList> allParts = new ArrayList<>();
+    ConsecutivePartList currentParts = null;
+    for (ColumnChunkMetaData mc : block.getColumns()) {
+      ColumnPath pathKey = mc.getPath();
+      ColumnDescriptor columnDescriptor = paths.get(pathKey);
+      if (columnDescriptor != null) {
+        OffsetIndex offsetIndex = ciStore.getOffsetIndex(mc.getPath());
+        IndexFilter indexFilter = new IndexFilter(rowRanges, offsetIndex, block.getRowCount());
+        OffsetIndex filteredOffsetIndex = indexFilter.filterOffsetIndex();
+        for (IndexFilter.OffsetRange range :
+            indexFilter.calculateOffsetRanges(filteredOffsetIndex, mc)) {
+          BenchmarkCounter.incrementTotalBytes(range.length);
+          long startingPos = range.offset;
+          // first part or not consecutive => new list
+          if (currentParts == null || currentParts.endPos() != startingPos) {
+            currentParts = new ConsecutivePartList(startingPos);
+            allParts.add(currentParts);
+          }
+          ChunkDescriptor chunkDescriptor =
+              new ChunkDescriptor(columnDescriptor, mc, startingPos, range.length);
+          currentParts.addChunk(chunkDescriptor);
+          builder.setOffsetIndex(chunkDescriptor, filteredOffsetIndex);
+        }
+      }
+    }
+    // actually read all the chunks
+    return readChunks(block, allParts, builder);
+  }
+
+  // Visible for testing
+  ColumnIndexReader getColumnIndexReader(int blockIndex) {
+    ColumnIndexReader ciStore = blockIndexStores.get(blockIndex);
+    if (ciStore == null) {
+      ciStore = ColumnIndexReader.create(blocks.get(blockIndex), paths.keySet(), fileDecryptor, f);
+      blockIndexStores.set(blockIndex, ciStore);
+    }
+    return ciStore;
+  }
+
+  private PageReadStore readChunks(
+      BlockMetaData block, List<ConsecutivePartList> allParts, ChunkListBuilder builder)
+      throws IOException {
+    for (ConsecutivePartList consecutiveChunks : allParts) {
+      if (shouldReadParallel()) {
+        consecutiveChunks.readAllParallel(builder);
+      } else {
+        consecutiveChunks.readAll(f, builder);
+      }
+    }
+    for (Chunk chunk : builder.build()) {
+      readChunkPages(chunk, block);
+    }
+
+    advanceToNextBlock();
+
+    return currentRowGroup;
+  }
+
+  private boolean shouldReadParallel() {
+    if (file instanceof CometInputFile) {
+      URI uri = ((CometInputFile) file).getPath().toUri();
+      return shouldReadParallel(cometOptions, uri.getScheme());
+    }
+
+    return false;
+  }
+
+  static boolean shouldReadParallel(ReadOptions options, String scheme) {
+    return options.isParallelIOEnabled() && shouldReadParallelForScheme(scheme);
+  }
+
+  private static boolean shouldReadParallelForScheme(String scheme) {
+    if (scheme == null) {
+      return false;
+    }
+
+    switch (scheme) {
+      case "s3a":
+        // Only enable parallel read for S3, so far.
+        return true;
+      default:
+        return false;
+    }
+  }
+
+  private void readChunkPages(Chunk chunk, BlockMetaData block) throws IOException {
+    if (fileDecryptor == null || fileDecryptor.plaintextFile()) {
+      currentRowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
+      return;
+    }
+    // Encrypted file
+    ColumnPath columnPath = ColumnPath.get(chunk.descriptor.col.getPath());
+    InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(columnPath);
+    if (!columnDecryptionSetup.isEncrypted()) { // plaintext column
+      currentRowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
+    } else { // encrypted column
+      currentRowGroup.addColumn(
+          chunk.descriptor.col,
+          chunk.readAllPages(
+              columnDecryptionSetup.getMetaDataDecryptor(),
+              columnDecryptionSetup.getDataDecryptor(),
+              fileDecryptor.getFileAAD(),
+              block.getOrdinal(),
+              columnDecryptionSetup.getOrdinal()));
+    }
+  }
+
+  private boolean advanceToNextBlock() {
+    if (currentBlock == blocks.size()) {
+      return false;
+    }
+    // update the current block and instantiate a dictionary reader for it
+    ++currentBlock;
+    return true;
+  }
+
+  public long[] getRowIndices() {
+    long[] rowIndices = new long[blocks.size() * 2];
+    for (int i = 0, n = blocks.size(); i < n; i++) {
+      BlockMetaData block = blocks.get(i);
+      rowIndices[i * 2] = getRowIndexOffset(block);
+      rowIndices[i * 2 + 1] = block.getRowCount();
+    }
+    return rowIndices;
+  }
+
+  // Uses reflection to get row index offset from a Parquet block metadata.
+  //
+  // The reason reflection is used here is that some Spark versions still depend on a
+  // Parquet version where the method `getRowIndexOffset` is not public.
+  private long getRowIndexOffset(BlockMetaData metaData) {
+    try {
+      Method method = BlockMetaData.class.getMethod("getRowIndexOffset");
+      method.setAccessible(true);
+      return (long) method.invoke(metaData);
+    } catch (Exception e) {
+      throw new RuntimeException("Error when calling getRowIndexOffset", e);
+    }
+  }
+
+  private RowRanges getRowRanges(int blockIndex) {
+    Preconditions.checkState(
+        FilterCompat.isFilteringRequired(options.getRecordFilter()),
+        "Should not be invoked if filter is null or NOOP");
+    RowRanges rowRanges = blockRowRanges.get(blockIndex);
+    if (rowRanges == null) {
+      rowRanges =
+          ColumnIndexFilter.calculateRowRanges(
+              options.getRecordFilter(),
+              getColumnIndexReader(blockIndex),
+              paths.keySet(),
+              blocks.get(blockIndex).getRowCount());
+      blockRowRanges.set(blockIndex, rowRanges);
+    }
+    return rowRanges;
+  }
+
+  private static ParquetMetadata readFooter(
+      InputFile file,
+      ParquetReadOptions options,
+      SeekableInputStream f,
+      ParquetMetadataConverter converter)
+      throws IOException {
+    long fileLen = file.getLength();
+    String filePath = file.toString();
+    LOG.debug("File length {}", fileLen);
+
+    int FOOTER_LENGTH_SIZE = 4;
+
+    // MAGIC + data + footer + footerIndex + MAGIC
+    if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) {
+      throw new RuntimeException(
+          filePath + " is not a Parquet file (length is too low: " + fileLen + ")");
+    }
+
+    // Read footer length and magic string - with a single seek
+    byte[] magic = new byte[MAGIC.length];
+    long fileMetadataLengthIndex = fileLen - magic.length - FOOTER_LENGTH_SIZE;
+    LOG.debug("reading footer index at {}", fileMetadataLengthIndex);
+    f.seek(fileMetadataLengthIndex);
+    int fileMetadataLength = BytesUtils.readIntLittleEndian(f);
+    f.readFully(magic);
+
+    boolean encryptedFooterMode;
+    if (Arrays.equals(MAGIC, magic)) {
+      encryptedFooterMode = false;
+    } else if (Arrays.equals(EFMAGIC, magic)) {
+      encryptedFooterMode = true;
+    } else {
+      throw new RuntimeException(
+          filePath
+              + " is not a Parquet file. Expected magic number "
+              + "at tail, but found "
+              + Arrays.toString(magic));
+    }
+
+    long fileMetadataIndex = fileMetadataLengthIndex - fileMetadataLength;
+    LOG.debug("read footer length: {}, footer index: {}", fileMetadataLength, fileMetadataIndex);
+    if (fileMetadataIndex < magic.length || fileMetadataIndex >= fileMetadataLengthIndex) {
+      throw new RuntimeException(
+          "corrupted file: the footer index is not within the file: " + fileMetadataIndex);
+    }
+    f.seek(fileMetadataIndex);
+
+    FileDecryptionProperties fileDecryptionProperties = options.getDecryptionProperties();
+    InternalFileDecryptor fileDecryptor = null;
+    if (null != fileDecryptionProperties) {
+      fileDecryptor = new InternalFileDecryptor(fileDecryptionProperties);
+    }
+
+    // Read all the footer bytes in one time to avoid multiple read operations,
+    // since it can be pretty time consuming for a single read operation in HDFS.
+    byte[] footerBytes = new byte[fileMetadataLength];
+    f.readFully(footerBytes);
+    ByteBuffer footerBytesBuffer = ByteBuffer.wrap(footerBytes);
+    LOG.debug("Finished to read all footer bytes.");
+    InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
+
+    // Regular file, or encrypted file with plaintext footer
+    if (!encryptedFooterMode) {
+      return converter.readParquetMetadata(
+          footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
+    }
+
+    // Encrypted file with encrypted footer
+    if (fileDecryptor == null) {
+      throw new ParquetCryptoRuntimeException(
+          "Trying to read file with encrypted footer. " + "No keys available");
+    }
+    FileCryptoMetaData fileCryptoMetaData = Util.readFileCryptoMetaData(footerBytesStream);
+    fileDecryptor.setFileCryptoMetaData(
+        fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
+    // footer length is required only for signed plaintext footers
+    return converter.readParquetMetadata(
+        footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
+  }
+
+  private List<BlockMetaData> filterRowGroups(List<BlockMetaData> blocks) {
+    FilterCompat.Filter recordFilter = options.getRecordFilter();
+    if (FilterCompat.isFilteringRequired(recordFilter)) {
+      // set up data filters based on configured levels
+      List<RowGroupFilter.FilterLevel> levels = new ArrayList<>();
+
+      if (options.useStatsFilter()) {
+        levels.add(STATISTICS);
+      }
+
+      if (options.useDictionaryFilter()) {
+        levels.add(DICTIONARY);
+      }
+
+      if (options.useBloomFilter()) {
+        levels.add(BLOOMFILTER);
+      }
+      return RowGroupFilter.filterRowGroups(levels, recordFilter, blocks, this);
+    }
+
+    return blocks;
+  }
+
+  private static <T> List<T> listWithNulls(int size) {
+    return Stream.generate(() -> (T) null).limit(size).collect(Collectors.toList());
+  }
+
+  public void closeStream() throws IOException {
+    if (f != null) {
+      f.close();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      if (f != null) {
+        f.close();
+      }
+    } finally {
+      options.getCodecFactory().release();
+    }
+  }
+
+  /**
+   * Builder to concatenate the buffers of the discontinuous parts for the same column. These parts
+   * are generated as a result of the column-index based filtering when some pages might be skipped
+   * at reading.
+   */
+  private class ChunkListBuilder {
+    private class ChunkData {
+      final List<ByteBuffer> buffers = new ArrayList<>();
+      OffsetIndex offsetIndex;
+    }
+
+    private final Map<ChunkDescriptor, ChunkData> map = new HashMap<>();
+
+    void add(ChunkDescriptor descriptor, List<ByteBuffer> buffers) {
+      ChunkListBuilder.ChunkData data = map.get(descriptor);
+      if (data == null) {
+        data = new ChunkData();
+        map.put(descriptor, data);
+      }
+      data.buffers.addAll(buffers);
+    }
+
+    void setOffsetIndex(ChunkDescriptor descriptor, OffsetIndex offsetIndex) {
+      ChunkData data = map.get(descriptor);
+      if (data == null) {
+        data = new ChunkData();
+        map.put(descriptor, data);
+      }
+      data.offsetIndex = offsetIndex;
+    }
+
+    List<Chunk> build() {
+      List<Chunk> chunks = new ArrayList<>();
+      for (Map.Entry<ChunkDescriptor, ChunkListBuilder.ChunkData> entry : map.entrySet()) {
+        ChunkDescriptor descriptor = entry.getKey();
+        ChunkData data = entry.getValue();
+        chunks.add(new Chunk(descriptor, data.buffers, data.offsetIndex));
+      }
+      return chunks;
+    }
+  }
+
+  /** The data for a column chunk */
+  private class Chunk {
+    private final ChunkDescriptor descriptor;
+    private final ByteBufferInputStream stream;
+    final OffsetIndex offsetIndex;
+
+    /**
+     * @param descriptor descriptor for the chunk
+     * @param buffers ByteBuffers that contain the chunk
+     * @param offsetIndex the offset index for this column; might be null
+     */
+    Chunk(ChunkDescriptor descriptor, List<ByteBuffer> buffers, OffsetIndex offsetIndex) {
+      this.descriptor = descriptor;
+      this.stream = ByteBufferInputStream.wrap(buffers);
+      this.offsetIndex = offsetIndex;
+    }
+
+    protected PageHeader readPageHeader(BlockCipher.Decryptor blockDecryptor, byte[] pageHeaderAAD)
+        throws IOException {
+      return Util.readPageHeader(stream, blockDecryptor, pageHeaderAAD);
+    }
+
+    /**
+     * Calculate checksum of input bytes, throw decoding exception if it does not match the provided
+     * reference crc
+     */
+    private void verifyCrc(int referenceCrc, byte[] bytes, String exceptionMsg) {
+      crc.reset();
+      crc.update(bytes);
+      if (crc.getValue() != ((long) referenceCrc & 0xffffffffL)) {
+        throw new ParquetDecodingException(exceptionMsg);
+      }
+    }
+
+    private ColumnPageReader readAllPages() throws IOException {
+      return readAllPages(null, null, null, -1, -1);
+    }
+
+    private ColumnPageReader readAllPages(
+        BlockCipher.Decryptor headerBlockDecryptor,
+        BlockCipher.Decryptor pageBlockDecryptor,
+        byte[] aadPrefix,
+        int rowGroupOrdinal,
+        int columnOrdinal)
+        throws IOException {
+      List<DataPage> pagesInChunk = new ArrayList<>();
+      DictionaryPage dictionaryPage = null;
+      PrimitiveType type =
+          fileMetaData.getSchema().getType(descriptor.col.getPath()).asPrimitiveType();
+
+      long valuesCountReadSoFar = 0;
+      int dataPageCountReadSoFar = 0;
+      byte[] dataPageHeaderAAD = null;
+      if (null != headerBlockDecryptor) {
+        dataPageHeaderAAD =
+            AesCipher.createModuleAAD(
+                aadPrefix,
+                ModuleCipherFactory.ModuleType.DataPageHeader,
+                rowGroupOrdinal,
+                columnOrdinal,
+                getPageOrdinal(dataPageCountReadSoFar));
+      }
+      while (hasMorePages(valuesCountReadSoFar, dataPageCountReadSoFar)) {
+        byte[] pageHeaderAAD = dataPageHeaderAAD;
+        if (null != headerBlockDecryptor) {
+          // Important: this verifies file integrity (makes sure dictionary page had not been
+          // removed)
+          if (null == dictionaryPage && descriptor.metadata.hasDictionaryPage()) {
+            pageHeaderAAD =
+                AesCipher.createModuleAAD(
+                    aadPrefix,
+                    ModuleCipherFactory.ModuleType.DictionaryPageHeader,
+                    rowGroupOrdinal,
+                    columnOrdinal,
+                    -1);
+          } else {
+            int pageOrdinal = getPageOrdinal(dataPageCountReadSoFar);
+            AesCipher.quickUpdatePageAAD(dataPageHeaderAAD, pageOrdinal);
+          }
+        }
+
+        PageHeader pageHeader = readPageHeader(headerBlockDecryptor, pageHeaderAAD);
+        int uncompressedPageSize = pageHeader.getUncompressed_page_size();
+        int compressedPageSize = pageHeader.getCompressed_page_size();
+        final BytesInput pageBytes;
+        switch (pageHeader.type) {
+          case DICTIONARY_PAGE:
+            // there is only one dictionary page per column chunk
+            if (dictionaryPage != null) {
+              throw new ParquetDecodingException(
+                  "more than one dictionary page in column " + descriptor.col);
+            }
+            pageBytes = this.readAsBytesInput(compressedPageSize);
+            if (options.usePageChecksumVerification() && pageHeader.isSetCrc()) {
+              verifyCrc(
+                  pageHeader.getCrc(),
+                  pageBytes.toByteArray(),
+                  "could not verify dictionary page integrity, CRC checksum verification failed");
+            }
+            DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
+            dictionaryPage =
+                new DictionaryPage(
+                    pageBytes,
+                    uncompressedPageSize,
+                    dicHeader.getNum_values(),
+                    converter.getEncoding(dicHeader.getEncoding()));
+            // Copy crc to new page, used for testing
+            if (pageHeader.isSetCrc()) {
+              dictionaryPage.setCrc(pageHeader.getCrc());
+            }
+            break;
+
+          case DATA_PAGE:
+            DataPageHeader dataHeaderV1 = pageHeader.getData_page_header();
+            pageBytes = this.readAsBytesInput(compressedPageSize);
+            if (options.usePageChecksumVerification() && pageHeader.isSetCrc()) {
+              verifyCrc(
+                  pageHeader.getCrc(),
+                  pageBytes.toByteArray(),
+                  "could not verify page integrity, CRC checksum verification failed");
+            }
+            DataPageV1 dataPageV1 =
+                new DataPageV1(
+                    pageBytes,
+                    dataHeaderV1.getNum_values(),
+                    uncompressedPageSize,
+                    converter.fromParquetStatistics(
+                        getFileMetaData().getCreatedBy(), dataHeaderV1.getStatistics(), type),
+                    converter.getEncoding(dataHeaderV1.getRepetition_level_encoding()),
+                    converter.getEncoding(dataHeaderV1.getDefinition_level_encoding()),
+                    converter.getEncoding(dataHeaderV1.getEncoding()));
+            // Copy crc to new page, used for testing
+            if (pageHeader.isSetCrc()) {
+              dataPageV1.setCrc(pageHeader.getCrc());
+            }
+            pagesInChunk.add(dataPageV1);
+            valuesCountReadSoFar += dataHeaderV1.getNum_values();
+            ++dataPageCountReadSoFar;
+            break;
+
+          case DATA_PAGE_V2:
+            DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2();
+            int dataSize =
+                compressedPageSize
+                    - dataHeaderV2.getRepetition_levels_byte_length()
+                    - dataHeaderV2.getDefinition_levels_byte_length();
+            pagesInChunk.add(
+                new DataPageV2(
+                    dataHeaderV2.getNum_rows(),
+                    dataHeaderV2.getNum_nulls(),
+                    dataHeaderV2.getNum_values(),
+                    this.readAsBytesInput(dataHeaderV2.getRepetition_levels_byte_length()),
+                    this.readAsBytesInput(dataHeaderV2.getDefinition_levels_byte_length()),
+                    converter.getEncoding(dataHeaderV2.getEncoding()),
+                    this.readAsBytesInput(dataSize),
+                    uncompressedPageSize,
+                    converter.fromParquetStatistics(
+                        getFileMetaData().getCreatedBy(), dataHeaderV2.getStatistics(), type),
+                    dataHeaderV2.isIs_compressed()));
+            valuesCountReadSoFar += dataHeaderV2.getNum_values();
+            ++dataPageCountReadSoFar;
+            break;
+
+          default:
+            LOG.debug(
+                "skipping page of type {} of size {}", pageHeader.getType(), compressedPageSize);
+            stream.skipFully(compressedPageSize);
+            break;
+        }
+      }
+      if (offsetIndex == null && valuesCountReadSoFar != descriptor.metadata.getValueCount()) {
+        // Would be nice to have a CorruptParquetFileException or something as a subclass?
+        throw new IOException(
+            "Expected "
+                + descriptor.metadata.getValueCount()
+                + " values in column chunk at "
+                + file
+                + " offset "
+                + descriptor.metadata.getFirstDataPageOffset()
+                + " but got "
+                + valuesCountReadSoFar
+                + " values instead over "
+                + pagesInChunk.size()
+                + " pages ending at file offset "
+                + (descriptor.fileOffset + stream.position()));
+      }
+      CompressionCodecFactory.BytesInputDecompressor decompressor =
+          options.getCodecFactory().getDecompressor(descriptor.metadata.getCodec());
+      return new ColumnPageReader(
+          decompressor,
+          pagesInChunk,
+          dictionaryPage,
+          offsetIndex,
+          blocks.get(currentBlock).getRowCount(),
+          pageBlockDecryptor,
+          aadPrefix,
+          rowGroupOrdinal,
+          columnOrdinal);
+    }
+
+    private boolean hasMorePages(long valuesCountReadSoFar, int dataPageCountReadSoFar) {
+      return offsetIndex == null
+          ? valuesCountReadSoFar < descriptor.metadata.getValueCount()
+          : dataPageCountReadSoFar < offsetIndex.getPageCount();
+    }
+
+    private int getPageOrdinal(int dataPageCountReadSoFar) {
+      if (null == offsetIndex) {
+        return dataPageCountReadSoFar;
+      }
+
+      return offsetIndex.getPageOrdinal(dataPageCountReadSoFar);
+    }
+
+    /**
+     * @param size the size of the page
+     * @return the page
+     * @throws IOException if there is an error while reading from the file stream
+     */
+    public BytesInput readAsBytesInput(int size) throws IOException {
+      return BytesInput.from(stream.sliceBuffers(size));
+    }
+  }
+
+  /**
+   * Describes a list of consecutive parts to be read at once. A consecutive part may contain whole
+   * column chunks or only parts of them (some pages).
+   */
+  private class ConsecutivePartList {
+    private final long offset;
+    private final List<ChunkDescriptor> chunks = new ArrayList<>();
+    private long length;
+    private final SQLMetric fileReadTimeMetric;
+    private final SQLMetric fileReadSizeMetric;
+    private final SQLMetric readThroughput;
+
+    /**
+     * @param offset where the first chunk starts
+     */
+    ConsecutivePartList(long offset) {
+      if (metrics != null) {
+        this.fileReadTimeMetric = metrics.get("ParquetInputFileReadTime");
+        this.fileReadSizeMetric = metrics.get("ParquetInputFileReadSize");
+        this.readThroughput = metrics.get("ParquetInputFileReadThroughput");
+      } else {
+        this.fileReadTimeMetric = null;
+        this.fileReadSizeMetric = null;
+        this.readThroughput = null;
+      }
+      this.offset = offset;
+    }
+
+    /**
+     * Adds a chunk to the list. It must be consecutive to the previous chunk.
+     *
+     * @param descriptor a chunk descriptor
+     */
+    public void addChunk(ChunkDescriptor descriptor) {
+      chunks.add(descriptor);
+      length += descriptor.size;
+    }
+
+    private List<ByteBuffer> allocateReadBuffers() {
+      int fullAllocations = Math.toIntExact(length / options.getMaxAllocationSize());
+      int lastAllocationSize = Math.toIntExact(length % options.getMaxAllocationSize());
+
+      int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0);
+      List<ByteBuffer> buffers = new ArrayList<>(numAllocations);
+
+      for (int i = 0; i < fullAllocations; i += 1) {
+        buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize()));
+      }
+
+      if (lastAllocationSize > 0) {
+        buffers.add(options.getAllocator().allocate(lastAllocationSize));
+      }
+      return buffers;
+    }
+
+    /**
+     * @param f file to read the chunks from
+     * @param builder used to build chunk list to read the pages for the different columns
+     * @throws IOException if there is an error while reading from the stream
+     */
+    public void readAll(SeekableInputStream f, ChunkListBuilder builder) throws IOException {
+      f.seek(offset);
+
+      List<ByteBuffer> buffers = allocateReadBuffers();
+      long startNs = System.nanoTime();
+
+      for (ByteBuffer buffer : buffers) {
+        f.readFully(buffer);
+        buffer.flip();
+      }
+      setReadMetrics(startNs);
+
+      // report in a counter the data we just scanned
+      BenchmarkCounter.incrementBytesRead(length);
+      ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffers);
+      for (int i = 0; i < chunks.size(); i++) {
+        ChunkDescriptor descriptor = chunks.get(i);
+        if (descriptor.col != null) {
+          builder.add(descriptor, stream.sliceBuffers(descriptor.size));
+        } else {
+          stream.skipFully(descriptor.size);
+        }
+      }
+    }
+
+    /**
+     * Api to read a consecutive range from the Parquet file in parallel. This is identical to
+     * {@link #readAll(SeekableInputStream, ChunkListBuilder) readAll}, except that the consecutive
+     * range is split into multiple smaller ranges and read in parallel. The parallelism can be set
+     * by specifying the threadpool size via {@link
+     * ReadOptions.Builder#withParallelIOThreadPoolSize(int)}.
+     *
+     * @param builder used to build chunk list to read the pages for the different columns
+     * @throws IOException if there is an error while reading from the stream
+     */
+    public void readAllParallel(ChunkListBuilder builder) throws IOException {
+
+      List<ByteBuffer> buffers = allocateReadBuffers();
+      long startNs = System.nanoTime();
+
+      int nThreads = cometOptions.parallelIOThreadPoolSize();
+      ExecutorService threadPool = CometFileReaderThreadPool.getOrCreateThreadPool(nThreads);
+      List<Future<Void>> futures = new ArrayList<>();
+
+      long currentOffset = this.offset;
+      int buffersPerThread = buffers.size() / nThreads;
+      int remaining = buffers.size() % nThreads;
+      // offset in input file each thread seeks to before beginning read
+      long[] offsets = new long[nThreads];
+      // index of buffer where each thread will start writing data
+      int[] bufferIndexes = new int[nThreads];
+      //  number of buffers for each thread to fill
+      int[] numBuffers = new int[nThreads];
+
+      int bufferNum = 0;
+      for (int i = 0; i < nThreads; i++) {
+        int nBuffers = 0;
+        offsets[i] = currentOffset;
+        bufferIndexes[i] = bufferNum;
+        nBuffers = buffersPerThread;
+        for (int j = 0; j < buffersPerThread; j++) {
+          currentOffset += buffers.get(bufferNum).capacity();
+          bufferNum++;
+        }
+        if (remaining > 0) {
+          remaining--;
+          currentOffset += buffers.get(bufferNum).capacity();
+          bufferNum++;
+          nBuffers++;
+        }
+        numBuffers[i] = nBuffers;
+      }
+      for (int n = 0; n < nThreads; n++) {
+        int threadIndex = n;
+        long pos = offsets[threadIndex];
+        int bufferIndex = bufferIndexes[threadIndex];
+        int nBuffers = numBuffers[threadIndex];
+        if (nBuffers == 0) {
+          continue;
+        }
+
+        // Find the total number of bytes to read for the current thread
+        long tmp = 0;
+        for (int i = 0; i < nBuffers; i++) {
+          int bufNo = bufferIndex + i;
+          if (bufNo >= buffers.size()) break;
+          tmp += buffers.get(bufNo).capacity();
+        }
+        final long length = tmp;
+
+        futures.add(
+            threadPool.submit(
+                () -> {
+                  SeekableInputStream inputStream = null;
+                  try {
+                    if (file instanceof CometInputFile) {
+                      inputStream = (((CometInputFile) file).newStream(pos, length));
+                    } else {
+                      inputStream = file.newStream();
+                    }
+
+                    inputStream.seek(pos);
+                    long curPos = pos;
+                    for (int i = 0; i < nBuffers; i++) {
+                      int bufNo = bufferIndex + i;
+                      if (bufNo >= buffers.size()) {
+                        break;
+                      }
+                      ByteBuffer buffer = buffers.get(bufNo);
+                      LOG.debug(
+                          "Thread: {} Offset: {} Buffer: {} Size: {}",
+                          threadIndex,
+                          curPos,
+                          bufNo,
+                          buffer.capacity());
+                      curPos += buffer.capacity();
+                      inputStream.readFully(buffer);
+                      buffer.flip();
+                    } // for
+                  } finally {
+                    if (inputStream != null) {
+                      inputStream.close();
+                    }
+                  }
+
+                  return null;
+                }));
+      }
+
+      for (Future<Void> future : futures) {
+        try {
+          future.get();
+        } catch (InterruptedException | ExecutionException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      setReadMetrics(startNs);
+
+      ByteBufferInputStream stream;
+      stream = ByteBufferInputStream.wrap(buffers);
+      // report in a counter the data we just scanned
+      BenchmarkCounter.incrementBytesRead(length);
+      for (int i = 0; i < chunks.size(); i++) {
+        ChunkDescriptor descriptor = chunks.get(i);
+        if (descriptor.col != null) {
+          builder.add(descriptor, stream.sliceBuffers(descriptor.size));
+        } else {
+          stream.skipFully(descriptor.size);
+        }
+      }
+    }
+
+    private void setReadMetrics(long startNs) {
+      long totalFileReadTimeNs = System.nanoTime() - startNs;
+      double sizeInMb = ((double) length) / (1024 * 1024);
+      double timeInSec = ((double) totalFileReadTimeNs) / 1000_0000_0000L;
+      double throughput = sizeInMb / timeInSec;
+      LOG.debug(
+          "Comet: File Read stats:  Length: {} MB, Time: {} secs, throughput: {} MB/sec ",
+          sizeInMb,
+          timeInSec,
+          throughput);
+      if (fileReadTimeMetric != null) {
+        fileReadTimeMetric.add(totalFileReadTimeNs);
+      }
+      if (fileReadSizeMetric != null) {
+        fileReadSizeMetric.add(length);
+      }
+      if (readThroughput != null) {
+        readThroughput.set(throughput);
+      }
+    }
+
+    /**
+     * @return the position following the last byte of these chunks
+     */
+    public long endPos() {
+      return offset + length;
+    }
+  }
+
+  /** Information needed to read a column chunk or a part of it. */
+  private static class ChunkDescriptor {
+    private final ColumnDescriptor col;
+    private final ColumnChunkMetaData metadata;
+    private final long fileOffset;
+    private final long size;
+
+    /**
+     * @param col column this chunk is part of
+     * @param metadata metadata for the column
+     * @param fileOffset offset in the file where this chunk starts
+     * @param size size of the chunk
+     */
+    ChunkDescriptor(
+        ColumnDescriptor col, ColumnChunkMetaData metadata, long fileOffset, long size) {
+      this.col = col;
+      this.metadata = metadata;
+      this.fileOffset = fileOffset;
+      this.size = size;
+    }
+
+    @Override
+    public int hashCode() {
+      return col.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj instanceof ChunkDescriptor) {
+        return col.equals(((ChunkDescriptor) obj).col);
+      } else {
+        return false;
+      }
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/FooterReader.java b/common/src/main/java/org/apache/comet/parquet/FooterReader.java
new file mode 100644
index 000000000..092eb442c
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/FooterReader.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.HadoopReadOptions;
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.spark.sql.execution.datasources.PartitionedFile;
+
+/**
+ * Copied from Spark's `ParquetFooterReader` in order to avoid shading issue around Parquet.
+ *
+ * <p>`FooterReader` is a util class which encapsulates the helper methods of reading parquet file
+ * footer.
+ */
+public class FooterReader {
+  public static ParquetMetadata readFooter(Configuration configuration, PartitionedFile file)
+      throws IOException, URISyntaxException {
+    long start = file.start();
+    long length = file.length();
+    Path filePath = new Path(new URI(file.filePath().toString()));
+    CometInputFile inputFile = CometInputFile.fromPath(filePath, configuration);
+    ParquetReadOptions readOptions =
+        HadoopReadOptions.builder(inputFile.getConf(), inputFile.getPath())
+            .withRange(start, start + length)
+            .build();
+    ReadOptions cometReadOptions = ReadOptions.builder(configuration).build();
+    // Use try-with-resources to ensure fd is closed.
+    try (FileReader fileReader = new FileReader(inputFile, readOptions, cometReadOptions)) {
+      return fileReader.getFooter();
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/IndexFilter.java b/common/src/main/java/org/apache/comet/parquet/IndexFilter.java
new file mode 100644
index 000000000..afa5687ca
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/IndexFilter.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+
+public class IndexFilter {
+  private final RowRanges rowRanges;
+  private final OffsetIndex offsetIndex;
+  private final long totalRowCount;
+
+  public IndexFilter(RowRanges rowRanges, OffsetIndex offsetIndex, long totalRowCount) {
+    this.rowRanges = rowRanges;
+    this.offsetIndex = offsetIndex;
+    this.totalRowCount = totalRowCount;
+  }
+
+  OffsetIndex filterOffsetIndex() {
+    List<Integer> indexMap = new ArrayList<>();
+    for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) {
+      long from = offsetIndex.getFirstRowIndex(i);
+      if (rowRanges.isOverlapping(from, offsetIndex.getLastRowIndex(i, totalRowCount))) {
+        indexMap.add(i);
+      }
+    }
+
+    int[] indexArray = new int[indexMap.size()];
+    for (int i = 0; i < indexArray.length; i++) {
+      indexArray[i] = indexMap.get(i);
+    }
+    return new FilteredOffsetIndex(offsetIndex, indexArray);
+  }
+
+  List<OffsetRange> calculateOffsetRanges(OffsetIndex filteredOffsetIndex, ColumnChunkMetaData cm) {
+    List<OffsetRange> ranges = new ArrayList<>();
+    long firstPageOffset = offsetIndex.getOffset(0);
+    int n = filteredOffsetIndex.getPageCount();
+
+    if (n > 0) {
+      OffsetRange currentRange = null;
+
+      // Add a range for the dictionary page if required
+      long rowGroupOffset = cm.getStartingPos();
+      if (rowGroupOffset < firstPageOffset) {
+        currentRange = new OffsetRange(rowGroupOffset, (int) (firstPageOffset - rowGroupOffset));
+        ranges.add(currentRange);
+      }
+
+      for (int i = 0; i < n; ++i) {
+        long offset = filteredOffsetIndex.getOffset(i);
+        int length = filteredOffsetIndex.getCompressedPageSize(i);
+        if (currentRange == null || !currentRange.extend(offset, length)) {
+          currentRange = new OffsetRange(offset, length);
+          ranges.add(currentRange);
+        }
+      }
+    }
+    return ranges;
+  }
+
+  private static class FilteredOffsetIndex implements OffsetIndex {
+    private final OffsetIndex offsetIndex;
+    private final int[] indexMap;
+
+    private FilteredOffsetIndex(OffsetIndex offsetIndex, int[] indexMap) {
+      this.offsetIndex = offsetIndex;
+      this.indexMap = indexMap;
+    }
+
+    @Override
+    public int getPageOrdinal(int pageIndex) {
+      return indexMap[pageIndex];
+    }
+
+    @Override
+    public int getPageCount() {
+      return indexMap.length;
+    }
+
+    @Override
+    public long getOffset(int pageIndex) {
+      return offsetIndex.getOffset(indexMap[pageIndex]);
+    }
+
+    @Override
+    public int getCompressedPageSize(int pageIndex) {
+      return offsetIndex.getCompressedPageSize(indexMap[pageIndex]);
+    }
+
+    @Override
+    public long getFirstRowIndex(int pageIndex) {
+      return offsetIndex.getFirstRowIndex(indexMap[pageIndex]);
+    }
+
+    @Override
+    public long getLastRowIndex(int pageIndex, long totalRowCount) {
+      int nextIndex = indexMap[pageIndex] + 1;
+      return (nextIndex >= offsetIndex.getPageCount()
+              ? totalRowCount
+              : offsetIndex.getFirstRowIndex(nextIndex))
+          - 1;
+    }
+  }
+
+  static class OffsetRange {
+    final long offset;
+    long length;
+
+    private OffsetRange(long offset, int length) {
+      this.offset = offset;
+      this.length = length;
+    }
+
+    private boolean extend(long offset, int length) {
+      if (this.offset + this.length == offset) {
+        this.length += length;
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/LazyColumnReader.java b/common/src/main/java/org/apache/comet/parquet/LazyColumnReader.java
new file mode 100644
index 000000000..a15d84192
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/LazyColumnReader.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.IOException;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.spark.sql.types.DataType;
+
+import org.apache.comet.vector.CometLazyVector;
+import org.apache.comet.vector.CometVector;
+
+public class LazyColumnReader extends ColumnReader {
+
+  // Remember the largest skipped index for sanity checking.
+  private int lastSkippedRowId = Integer.MAX_VALUE;
+
+  // Track whether the underlying page is drained.
+  private boolean isPageDrained = true;
+
+  // Leftover number of rows that did not skip in the previous batch.
+  private int numRowsToSkipFromPrevBatch;
+
+  // The lazy vector being updated.
+  private final CometLazyVector vector;
+
+  public LazyColumnReader(
+      DataType sparkReadType,
+      ColumnDescriptor descriptor,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestamp) {
+    super(sparkReadType, descriptor, batchSize, useDecimal128, useLegacyDateTimestamp);
+    this.batchSize = 0; // the batch size is set later in `readBatch`
+    this.vector = new CometLazyVector(sparkReadType, this, useDecimal128);
+  }
+
+  @Override
+  public void setPageReader(PageReader pageReader) throws IOException {
+    super.setPageReader(pageReader);
+    lastSkippedRowId = Integer.MAX_VALUE;
+    isPageDrained = true;
+    numRowsToSkipFromPrevBatch = 0;
+    currentNumValues = batchSize;
+  }
+
+  /**
+   * Lazily read a batch of 'total' rows for this column. The includes: 1) Skip any unused rows from
+   * the previous batch 2) Reset the native columnar batch 3) Reset tracking variables
+   *
+   * @param total the number of rows in the batch. MUST be <= the number of rows available in this
+   *     column chunk.
+   */
+  @Override
+  public void readBatch(int total) {
+    // Before starting a new batch, take care of the remaining rows to skip from the previous batch.
+    tryPageSkip(batchSize);
+    numRowsToSkipFromPrevBatch += batchSize - currentNumValues;
+
+    // Now first reset the current columnar batch so that it can be used to fill in a new batch
+    // of values. Then, keep reading more data pages (via 'readBatch') until the current batch is
+    // full, or we have read 'total' number of values.
+    Native.resetBatch(nativeHandle);
+
+    batchSize = total;
+    currentNumValues = 0;
+    lastSkippedRowId = -1;
+  }
+
+  @Override
+  public CometVector currentBatch() {
+    return vector;
+  }
+
+  /** Read all rows up to the `batchSize`. Expects no rows are skipped so far. */
+  public void readAllBatch() {
+    // All rows should be read without any skips so far
+    assert (lastSkippedRowId == -1);
+
+    readBatch(batchSize - 1, 0);
+  }
+
+  /**
+   * Read at least up to `rowId`. It may read beyond `rowId` if enough rows available in the page.
+   * It may skip reading rows before `rowId`. In case `rowId` is already read, return immediately.
+   *
+   * @param rowId the row index in the batch to read.
+   * @return true if `rowId` is newly materialized, or false if `rowId` is already materialized.
+   */
+  public boolean materializeUpToIfNecessary(int rowId) {
+    // Not allowed reading rowId if it may have skipped previously.
+    assert (rowId > lastSkippedRowId);
+
+    // If `rowId` is already materialized, return immediately.
+    if (rowId < currentNumValues) return false;
+
+    int numRowsWholePageSkipped = tryPageSkip(rowId);
+    readBatch(rowId, numRowsWholePageSkipped);
+    return true;
+  }
+
+  /**
+   * Read up to `rowId` (inclusive). If the whole pages are skipped previously in `tryPageSkip()`,
+   * pad the number of whole page skipped rows with nulls to the underlying vector before reading.
+   *
+   * @param rowId the row index in the batch to read.
+   * @param numNullRowsToPad the number of nulls to pad before reading.
+   */
+  private void readBatch(int rowId, int numNullRowsToPad) {
+    if (numRowsToSkipFromPrevBatch > 0) {
+      // Reaches here only when starting a new batch and the page is previously drained
+      readPage();
+      isPageDrained = false;
+      Native.skipBatch(nativeHandle, numRowsToSkipFromPrevBatch, true);
+      numRowsToSkipFromPrevBatch = 0;
+    }
+    while (rowId >= currentNumValues) {
+      int numRowsToRead = batchSize - currentNumValues;
+      if (isPageDrained) {
+        readPage();
+      }
+      int[] array = Native.readBatch(nativeHandle, numRowsToRead, numNullRowsToPad);
+      int read = array[0];
+      isPageDrained = read < numRowsToRead;
+      currentNumValues += read;
+      currentNumNulls += array[1];
+      // No need to update numNullRowsToPad. numNullRowsToPad > 0 means there were whole page skips.
+      // That guarantees that the Native.readBatch can read up to rowId in the current page.
+    }
+  }
+
+  /**
+   * Try to skip until `rowId` (exclusive). If possible, it skips whole underlying pages without
+   * decompressing. In that case, it returns early at the page end, so that the next iteration can
+   * lazily decide to `readPage()` or `tryPageSkip()` again.
+   *
+   * @param rowId the row index in the batch that it tries to skip up until (exclusive).
+   * @return the number of rows that the whole page skips were applied.
+   */
+  private int tryPageSkip(int rowId) {
+    int total = rowId - currentNumValues;
+    int wholePageSkipped = 0;
+    if (total > 0) {
+      // First try to skip from the non-drained underlying page.
+      int skipped = isPageDrained ? 0 : Native.skipBatch(nativeHandle, total);
+      total -= skipped;
+      isPageDrained = total > 0;
+      if (isPageDrained) {
+        ColumnPageReader columnPageReader = (ColumnPageReader) pageReader;
+        // It is always `columnPageReader.getPageValueCount() > numRowsToSkipFromPriorBatch`
+        int pageValueCount = columnPageReader.getPageValueCount() - numRowsToSkipFromPrevBatch;
+        while (pageValueCount <= total) {
+          // skip the entire page if the next page is small enough
+          columnPageReader.skipPage();
+          numRowsToSkipFromPrevBatch = 0;
+          total -= pageValueCount;
+          wholePageSkipped += pageValueCount;
+          pageValueCount = columnPageReader.getPageValueCount();
+        }
+      }
+
+      currentNumValues += skipped + wholePageSkipped;
+      currentNumNulls += skipped;
+      lastSkippedRowId = currentNumValues - 1;
+    }
+    return wholePageSkipped;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java b/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java
new file mode 100644
index 000000000..b8722ca78
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.spark.sql.types.DataType;
+
+import org.apache.comet.vector.CometPlainVector;
+import org.apache.comet.vector.CometVector;
+
+/** A metadata column reader that can be extended by {@link RowIndexColumnReader} etc. */
+public class MetadataColumnReader extends AbstractColumnReader {
+  private final BufferAllocator allocator = new RootAllocator();
+  private CometVector vector;
+
+  public MetadataColumnReader(DataType type, ColumnDescriptor descriptor, boolean useDecimal128) {
+    // TODO: should we handle legacy dates & timestamps for metadata columns?
+    super(type, descriptor, useDecimal128, false);
+  }
+
+  @Override
+  public void setBatchSize(int batchSize) {
+    close();
+    super.setBatchSize(batchSize);
+  }
+
+  @Override
+  public void readBatch(int total) {
+    if (vector == null) {
+      long[] addresses = Native.currentBatch(nativeHandle);
+      try (ArrowArray array = ArrowArray.wrap(addresses[0]);
+          ArrowSchema schema = ArrowSchema.wrap(addresses[1])) {
+        FieldVector fieldVector = Data.importVector(allocator, array, schema, null);
+        vector = new CometPlainVector(fieldVector, useDecimal128);
+      }
+    }
+    vector.setNumValues(total);
+  }
+
+  void setNumNulls(int total) {
+    vector.setNumNulls(total);
+  }
+
+  @Override
+  public CometVector currentBatch() {
+    return vector;
+  }
+
+  @Override
+  public void close() {
+    if (vector != null) {
+      vector.close();
+      vector = null;
+    }
+    super.close();
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java
new file mode 100644
index 000000000..0887ae12f
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/Native.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.nio.ByteBuffer;
+
+import org.apache.comet.NativeBase;
+
+public final class Native extends NativeBase {
+  public static int[] readBatch(long handle, int batchSize) {
+    return readBatch(handle, batchSize, 0);
+  }
+
+  public static int skipBatch(long handle, int batchSize) {
+    return skipBatch(handle, batchSize, false);
+  }
+
+  /** Native APIs * */
+
+  /**
+   * Creates a reader for a primitive Parquet column.
+   *
+   * @param physicalTypeId id for Parquet physical type
+   * @param logicalTypeId id for Parquet logical type
+   * @param expectedPhysicalTypeId id for Parquet physical type, converted from Spark read type.
+   *     This is used for type promotion.
+   * @param path the path from the root schema to the column, derived from the method
+   *     'ColumnDescriptor#getPath()'.
+   * @param maxDl the maximum definition level of the primitive column
+   * @param maxRl the maximum repetition level of the primitive column
+   * @param bitWidth (only set when logical type is INT) the bit width for the integer type (INT8,
+   *     INT16, INT32, etc)
+   * @param isSigned (only set when logical type is INT) whether it is signed or unsigned int.
+   * @param typeLength number of bytes required to store a value of the type, only set when the
+   *     physical type is FIXED_LEN_BYTE_ARRAY, otherwise it's 0.
+   * @param precision (only set when logical type is DECIMAL) precision of the decimal type
+   * @param expectedPrecision (only set when logical type is DECIMAL) precision of the decimal type
+   *     from Spark read schema. This is used for type promotion.
+   * @param scale (only set when logical type is DECIMAL) scale of the decimal type
+   * @param tu (only set when logical type is TIMESTAMP) unit for the timestamp
+   * @param isAdjustedUtc (only set when logical type is TIMESTAMP) whether the timestamp is
+   *     adjusted to UTC or not
+   * @param batchSize the batch size for the columnar read
+   * @param useDecimal128 whether to always return 128 bit decimal regardless of precision
+   * @param useLegacyDateTimestampOrNTZ whether to read legacy dates/timestamps as it is
+   * @return a pointer to a native Parquet column reader created
+   */
+  public static native long initColumnReader(
+      int physicalTypeId,
+      int logicalTypeId,
+      int expectedPhysicalTypeId,
+      String[] path,
+      int maxDl,
+      int maxRl,
+      int bitWidth,
+      boolean isSigned,
+      int typeLength,
+      int precision,
+      int expectedPrecision,
+      int scale,
+      int tu,
+      boolean isAdjustedUtc,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestampOrNTZ);
+
+  /**
+   * Pass a Parquet dictionary page to the native column reader. Note this should only be called
+   * once per Parquet column chunk. Otherwise it'll panic.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param dictionaryValueCount the number of values in this dictionary
+   * @param dictionaryData the actual dictionary page data, including repetition/definition levels
+   *     as well as values
+   * @param encoding the encoding used by the dictionary
+   */
+  public static native void setDictionaryPage(
+      long handle, int dictionaryValueCount, byte[] dictionaryData, int encoding);
+
+  /**
+   * Passes a Parquet data page V1 to the native column reader.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param pageValueCount the number of values in this data page
+   * @param pageData the actual page data, which should only contain PLAIN-encoded values.
+   * @param valueEncoding the encoding used by the values
+   */
+  public static native void setPageV1(
+      long handle, int pageValueCount, byte[] pageData, int valueEncoding);
+
+  /**
+   * Passes a Parquet data page V1 to the native column reader.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param pageValueCount the number of values in this data page
+   * @param buffer the actual page data, represented by a DirectByteBuffer.
+   * @param valueEncoding the encoding used by the values
+   */
+  public static native void setPageBufferV1(
+      long handle, int pageValueCount, ByteBuffer buffer, int valueEncoding);
+
+  /**
+   * Passes a Parquet data page V2 to the native column reader.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param pageValueCount the number of values in this data page
+   * @param defLevelData the data for definition levels
+   * @param repLevelData the data for repetition levels
+   * @param valueData the data for values
+   * @param valueEncoding the encoding used by the values
+   */
+  public static native void setPageV2(
+      long handle,
+      int pageValueCount,
+      byte[] defLevelData,
+      byte[] repLevelData,
+      byte[] valueData,
+      int valueEncoding);
+
+  /**
+   * Reset the current columnar batch. This will clear all the content of the batch as well as any
+   * internal state such as the current offset.
+   *
+   * @param handle the handle to the native Parquet column reader
+   */
+  public static native void resetBatch(long handle);
+
+  /**
+   * Reads at most 'batchSize' number of rows from the native Parquet column reader. Returns a tuple
+   * where the first element is the actual number of rows read (including both nulls and non-nulls),
+   * and the second element is the number of nulls read.
+   *
+   * <p>If the returned value is < 'batchSize' then it means the current page has been completely
+   * drained. In this case, the caller should call {@link Native#setPageV1} or {@link
+   * Native#setPageV2} before the next 'readBatch' call.
+   *
+   * <p>Note that the current page could also be drained if the returned value = 'batchSize', i.e.,
+   * the remaining number of rows in the page is exactly equal to 'batchSize'. In this case, the
+   * next 'readBatch' call will return 0 and the caller should call {@link Native#setPageV1} or
+   * {@link Native#setPageV2} next.
+   *
+   * <p>If `nullPadSize` > 0, it pads nulls into the underlying vector before the values will be
+   * read into.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param batchSize the number of rows to be read
+   * @param nullPadSize the number of nulls to pad before reading.
+   * @return a tuple: (the actual number of rows read, the number of nulls read)
+   */
+  public static native int[] readBatch(long handle, int batchSize, int nullPadSize);
+
+  /**
+   * Skips at most 'batchSize' number of rows from the native Parquet column reader, and returns the
+   * actual number of rows skipped.
+   *
+   * <p>If the returned value is < 'batchSize' then it means the current page has been completely
+   * drained. In this case, the caller should call {@link Native#setPageV1} or {@link
+   * Native#setPageV2} before the next 'skipBatch' call.
+   *
+   * <p>Note that the current page could also be drained if the returned value = 'batchSize', i.e.,
+   * the remaining number of rows in the page is exactly equal to 'batchSize'. In this case, the
+   * next 'skipBatch' call will return 0 and the caller should call {@link Native#setPageV1} or
+   * {@link Native#setPageV2} next.
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @param batchSize the number of rows to skip in the current page
+   * @param discard if true, discard read rows without padding nulls into the underlying vector
+   * @return the actual number of rows skipped
+   */
+  public static native int skipBatch(long handle, int batchSize, boolean discard);
+
+  /**
+   * Returns the current batch constructed via 'readBatch'
+   *
+   * @param handle the handle to the native Parquet column reader
+   * @return a long array with 2 elements, the first is the address to native Arrow array, and the
+   *     second is the address to the Arrow schema.
+   */
+  public static native long[] currentBatch(long handle);
+
+  /** Set methods to set a constant value for the reader, so it'll return constant vectors */
+  public static native void setNull(long handle);
+
+  public static native void setBoolean(long handle, boolean value);
+
+  public static native void setByte(long handle, byte value);
+
+  public static native void setShort(long handle, short value);
+
+  public static native void setInt(long handle, int value);
+
+  public static native void setLong(long handle, long value);
+
+  public static native void setFloat(long handle, float value);
+
+  public static native void setDouble(long handle, double value);
+
+  public static native void setBinary(long handle, byte[] value);
+
+  /** Set decimal backed by FixedLengthByteArray */
+  public static native void setDecimal(long handle, byte[] value);
+
+  /** Set position of row index vector for Iceberg Metadata Column */
+  public static native void setPosition(long handle, long value, int size);
+
+  /** Set row index vector for Spark row index metadata column and return vector size */
+  public static native int setIndices(long handle, long offset, int size, long[] indices);
+
+  /** Set deleted info for Iceberg Metadata Column */
+  public static native void setIsDeleted(long handle, boolean[] isDeleted);
+
+  /**
+   * Closes the native Parquet column reader and releases all resources associated with it.
+   *
+   * @param handle the handle to the native Parquet column reader
+   */
+  public static native void closeColumnReader(long handle);
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/ReadOptions.java b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java
new file mode 100644
index 000000000..6754443e6
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.spark.SparkEnv;
+import org.apache.spark.launcher.SparkLauncher;
+
+/**
+ * Comet specific Parquet related read options.
+ *
+ * <p>TODO: merge this with {@link org.apache.parquet.HadoopReadOptions} once PARQUET-2203 is done.
+ */
+public class ReadOptions {
+  private static final Logger LOG = LoggerFactory.getLogger(ReadOptions.class);
+  public static final String COMET_PARQUET_PARALLEL_IO_ENABLED =
+      "comet.parquet.read.parallel.io.enabled";
+  public static final boolean COMET_PARQUET_PARALLEL_IO_ENABLED_DEFAULT = true;
+
+  public static final String COMET_PARQUET_PARALLEL_IO_THREADS =
+      "comet.parquet.read.parallel.io.thread-pool.size";
+  public static final int COMET_PARQUET_PARALLEL_IO_THREADS_DEFAULT = 32;
+
+  public static final String COMET_IO_MERGE_RANGES = "comet.parquet.read.io.mergeRanges";
+  private static final boolean COMET_IO_MERGE_RANGES_DEFAULT = true;
+
+  public static final String COMET_IO_MERGE_RANGES_DELTA =
+      "comet.parquet.read.io.mergeRanges.delta";
+  private static final int COMET_IO_MERGE_RANGES_DELTA_DEFAULT = 1 << 23; // 8 MB
+
+  // Max number of concurrent tasks we expect. Used to autoconfigure S3 client connections
+  public static final int S3A_MAX_EXPECTED_PARALLELISM = 32;
+  // defined in hadoop-aws - org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS
+  public static final String S3A_MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum";
+  // default max connections in S3A - org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS
+  public static final int S3A_DEFAULT_MAX_HTTP_CONNECTIONS = 96;
+
+  public static final String S3A_READAHEAD_RANGE = "fs.s3a.readahead.range";
+  // Default read ahead range in Hadoop is 64K; we increase it to 1 MB
+  public static final long COMET_DEFAULT_READAHEAD_RANGE = 1 * 1024 * 1024; // 1 MB
+
+  private final boolean parallelIOEnabled;
+  private final int parallelIOThreadPoolSize;
+  private final boolean ioMergeRanges;
+  private final int ioMergeRangesDelta;
+
+  ReadOptions(
+      boolean parallelIOEnabled,
+      int parallelIOThreadPoolSize,
+      boolean ioMergeRanges,
+      int ioMergeRangesDelta) {
+    this.parallelIOEnabled = parallelIOEnabled;
+    this.parallelIOThreadPoolSize = parallelIOThreadPoolSize;
+    this.ioMergeRanges = ioMergeRanges;
+    this.ioMergeRangesDelta = ioMergeRangesDelta;
+  }
+
+  public boolean isParallelIOEnabled() {
+    return this.parallelIOEnabled;
+  }
+
+  public int parallelIOThreadPoolSize() {
+    return this.parallelIOThreadPoolSize;
+  }
+
+  public boolean isIOMergeRangesEnabled() {
+    return ioMergeRanges;
+  }
+
+  public int getIOMergeRangesDelta() {
+    return ioMergeRangesDelta;
+  }
+
+  public static Builder builder(Configuration conf) {
+    return new Builder(conf);
+  }
+
+  public static Builder builder() {
+    return builder(new Configuration());
+  }
+
+  public static class Builder {
+    private final Configuration conf;
+
+    private boolean parallelIOEnabled;
+    private int parallelIOThreadPoolSize;
+    private boolean ioMergeRanges;
+    private int ioMergeRangesDelta;
+
+    /**
+     * Whether to enable Parquet parallel IO when reading row groups. If true, Parquet reader will
+     * use multiple threads to read multiple chunks of data from the current row group in parallel.
+     */
+    public Builder enableParallelIO(boolean b) {
+      this.parallelIOEnabled = b;
+      return this;
+    }
+
+    /**
+     * Specify the number of threads to be used in parallel IO.
+     *
+     * <p><b>Note</b>: this will only be effective if parallel IO is enabled (e.g., via {@link
+     * #enableParallelIO(boolean)}).
+     */
+    public Builder withParallelIOThreadPoolSize(int numThreads) {
+      this.parallelIOThreadPoolSize = numThreads;
+      return this;
+    }
+
+    public Builder enableIOMergeRanges(boolean enableIOMergeRanges) {
+      this.ioMergeRanges = enableIOMergeRanges;
+      return this;
+    }
+
+    public Builder withIOMergeRangesDelta(int ioMergeRangesDelta) {
+      this.ioMergeRangesDelta = ioMergeRangesDelta;
+      return this;
+    }
+
+    public ReadOptions build() {
+      return new ReadOptions(
+          parallelIOEnabled, parallelIOThreadPoolSize, ioMergeRanges, ioMergeRangesDelta);
+    }
+
+    public Builder(Configuration conf) {
+      this.conf = conf;
+      this.parallelIOEnabled =
+          conf.getBoolean(
+              COMET_PARQUET_PARALLEL_IO_ENABLED, COMET_PARQUET_PARALLEL_IO_ENABLED_DEFAULT);
+      this.parallelIOThreadPoolSize =
+          conf.getInt(COMET_PARQUET_PARALLEL_IO_THREADS, COMET_PARQUET_PARALLEL_IO_THREADS_DEFAULT);
+      this.ioMergeRanges = conf.getBoolean(COMET_IO_MERGE_RANGES, COMET_IO_MERGE_RANGES_DEFAULT);
+      this.ioMergeRangesDelta =
+          conf.getInt(COMET_IO_MERGE_RANGES_DELTA, COMET_IO_MERGE_RANGES_DELTA_DEFAULT);
+      // override some S3 defaults
+      setS3Config();
+    }
+
+    // For paths to S3, if the s3 connection pool max is less than twice the product of
+    // parallel reader threads * number of cores, then increase the connection pool max
+    private void setS3Config() {
+      int s3ConnectionsMax = S3A_DEFAULT_MAX_HTTP_CONNECTIONS;
+      SparkEnv env = SparkEnv.get();
+      // Use a default number of cores in case we are using the FileReader outside the context
+      // of Spark.
+      int numExecutorCores = S3A_MAX_EXPECTED_PARALLELISM;
+      if (env != null) {
+        numExecutorCores = env.conf().getInt(SparkLauncher.EXECUTOR_CORES, numExecutorCores);
+      }
+      int parallelReaderThreads = this.parallelIOEnabled ? this.parallelIOThreadPoolSize : 1;
+      s3ConnectionsMax = Math.max(numExecutorCores * parallelReaderThreads * 2, s3ConnectionsMax);
+
+      setS3ConfIfGreater(conf, S3A_MAXIMUM_CONNECTIONS, s3ConnectionsMax);
+      setS3ConfIfGreater(conf, S3A_READAHEAD_RANGE, COMET_DEFAULT_READAHEAD_RANGE);
+    }
+
+    // Update the conf iff the new value is greater than the existing val
+    private void setS3ConfIfGreater(Configuration conf, String key, int newVal) {
+      int maxVal = newVal;
+      String curr = conf.get(key);
+      if (curr != null && !curr.isEmpty()) {
+        maxVal = Math.max(Integer.parseInt(curr), newVal);
+      }
+      LOG.info("File reader auto configured '{}={}'", key, maxVal);
+      conf.set(key, Integer.toString(maxVal));
+    }
+
+    // Update the conf iff the new value is greater than the existing val. This handles values that
+    // may have suffixes (K, M, G, T, P, E) indicating well known bytes size suffixes
+    private void setS3ConfIfGreater(Configuration conf, String key, long newVal) {
+      long maxVal = conf.getLongBytes(key, newVal);
+      maxVal = Math.max(maxVal, newVal);
+      LOG.info("File reader auto configured '{}={}'", key, maxVal);
+      conf.set(key, Long.toString(maxVal));
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/RowGroupFilter.java b/common/src/main/java/org/apache/comet/parquet/RowGroupFilter.java
new file mode 100644
index 000000000..40a61c05e
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/RowGroupFilter.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.compat.FilterCompat.Filter;
+import org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter;
+import org.apache.parquet.filter2.compat.FilterCompat.Visitor;
+import org.apache.parquet.filter2.dictionarylevel.DictionaryFilter;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator;
+import org.apache.parquet.filter2.statisticslevel.StatisticsFilter;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.schema.MessageType;
+
+public class RowGroupFilter implements Visitor<List<BlockMetaData>> {
+  private final List<BlockMetaData> blocks;
+  private final MessageType schema;
+  private final List<FilterLevel> levels;
+  private final FileReader reader;
+
+  public enum FilterLevel {
+    STATISTICS,
+    DICTIONARY,
+    BLOOMFILTER
+  }
+
+  public static List<BlockMetaData> filterRowGroups(
+      List<FilterLevel> levels, Filter filter, List<BlockMetaData> blocks, FileReader reader) {
+    return filter.accept(new RowGroupFilter(levels, blocks, reader));
+  }
+
+  private RowGroupFilter(List<FilterLevel> levels, List<BlockMetaData> blocks, FileReader reader) {
+    this.levels = levels;
+    this.blocks = blocks;
+    this.reader = reader;
+    this.schema = reader.getFileMetaData().getSchema();
+  }
+
+  @Override
+  public List<BlockMetaData> visit(FilterCompat.FilterPredicateCompat filterPredicateCompat) {
+    FilterPredicate filterPredicate = filterPredicateCompat.getFilterPredicate();
+
+    // check that the schema of the filter matches the schema of the file
+    SchemaCompatibilityValidator.validate(filterPredicate, schema);
+
+    List<BlockMetaData> filteredBlocks = new ArrayList<>();
+
+    for (BlockMetaData block : blocks) {
+      boolean drop = false;
+
+      if (levels.contains(FilterLevel.STATISTICS)) {
+        drop = StatisticsFilter.canDrop(filterPredicate, block.getColumns());
+      }
+
+      if (!drop && levels.contains(FilterLevel.DICTIONARY)) {
+        drop =
+            DictionaryFilter.canDrop(
+                filterPredicate,
+                block.getColumns(),
+                new DictionaryPageReader(
+                    block,
+                    reader.getFileMetaData().getFileDecryptor(),
+                    reader.getInputStream(),
+                    reader.getOptions()));
+      }
+
+      if (!drop && levels.contains(FilterLevel.BLOOMFILTER)) {
+        drop =
+            filterPredicate.accept(
+                new BloomFilterReader(
+                    block, reader.getFileMetaData().getFileDecryptor(), reader.getInputStream()));
+      }
+
+      if (!drop) {
+        filteredBlocks.add(block);
+      }
+    }
+
+    return filteredBlocks;
+  }
+
+  @Override
+  public List<BlockMetaData> visit(
+      FilterCompat.UnboundRecordFilterCompat unboundRecordFilterCompat) {
+    return blocks;
+  }
+
+  @Override
+  public List<BlockMetaData> visit(NoOpFilter noOpFilter) {
+    return blocks;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java b/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java
new file mode 100644
index 000000000..d5d73b078
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.PrimitiveIterator;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+
+public class RowGroupReader implements PageReadStore {
+  private final Map<ColumnDescriptor, PageReader> readers = new HashMap<>();
+  private final long rowCount;
+  private final RowRanges rowRanges;
+
+  public RowGroupReader(long rowCount) {
+    this.rowCount = rowCount;
+    this.rowRanges = null;
+  }
+
+  RowGroupReader(RowRanges rowRanges) {
+    this.rowRanges = rowRanges;
+    this.rowCount = rowRanges.rowCount();
+  }
+
+  @Override
+  public long getRowCount() {
+    return rowCount;
+  }
+
+  @Override
+  public PageReader getPageReader(ColumnDescriptor path) {
+    final PageReader pageReader = readers.get(path);
+    if (pageReader == null) {
+      throw new IllegalArgumentException(
+          path + " is not found: " + readers.keySet() + " " + rowCount);
+    }
+    return pageReader;
+  }
+
+  @Override
+  public Optional<PrimitiveIterator.OfLong> getRowIndexes() {
+    return rowRanges == null ? Optional.empty() : Optional.of(rowRanges.iterator());
+  }
+
+  void addColumn(ColumnDescriptor path, ColumnPageReader reader) {
+    if (readers.put(path, reader) != null) {
+      throw new IllegalStateException(path + " was already added");
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/RowIndexColumnReader.java b/common/src/main/java/org/apache/comet/parquet/RowIndexColumnReader.java
new file mode 100644
index 000000000..8448318db
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/RowIndexColumnReader.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.apache.spark.sql.types.*;
+
+/**
+ * A column reader that returns the row index vector. Used for reading row index metadata column for
+ * Spark 3.4+. The row index can be accessed by {@code _tmp_metadata_row_index} column.
+ */
+public class RowIndexColumnReader extends MetadataColumnReader {
+  /** The row indices that are used to initialize this column reader. */
+  private final long[] indices;
+
+  /** The current number of indices to skip reading from {@code indices}. */
+  private long offset;
+
+  public RowIndexColumnReader(StructField field, int batchSize, long[] indices) {
+    super(field.dataType(), TypeUtil.convertToParquet(field), false);
+    this.indices = indices;
+    setBatchSize(batchSize);
+  }
+
+  @Override
+  public void readBatch(int total) {
+    Native.resetBatch(nativeHandle);
+    int count = Native.setIndices(nativeHandle, offset, total, indices);
+    offset += count;
+
+    super.readBatch(count);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/SupportsComet.java b/common/src/main/java/org/apache/comet/parquet/SupportsComet.java
new file mode 100644
index 000000000..f330a7681
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/SupportsComet.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+public interface SupportsComet {
+  boolean isCometEnabled();
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
new file mode 100644
index 000000000..340bf9823
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.Arrays;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.*;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
+import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException;
+import org.apache.spark.sql.types.*;
+
+import org.apache.comet.CometConf;
+
+public class TypeUtil {
+
+  /** Converts the input Spark 'field' into a Parquet column descriptor. */
+  public static ColumnDescriptor convertToParquet(StructField field) {
+    Type.Repetition repetition;
+    int maxDefinitionLevel;
+    if (field.nullable()) {
+      repetition = Type.Repetition.OPTIONAL;
+      maxDefinitionLevel = 1;
+    } else {
+      repetition = Type.Repetition.REQUIRED;
+      maxDefinitionLevel = 0;
+    }
+    String[] path = new String[] {field.name()};
+
+    DataType type = field.dataType();
+
+    Types.PrimitiveBuilder<PrimitiveType> builder = null;
+    // Only partition column can be `NullType`, which also uses `ConstantColumnReader`. Here we
+    // piggy-back onto Parquet boolean type for constant vector of null values, we don't really
+    // care what Parquet type it is.
+    if (type == DataTypes.BooleanType || type == DataTypes.NullType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition);
+    } else if (type == DataTypes.IntegerType || type instanceof YearMonthIntervalType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition);
+    } else if (type == DataTypes.DateType) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
+              .as(LogicalTypeAnnotation.dateType());
+    } else if (type == DataTypes.ByteType) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
+              .as(LogicalTypeAnnotation.intType(8, true));
+    } else if (type == DataTypes.ShortType) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
+              .as(LogicalTypeAnnotation.intType(16, true));
+    } else if (type == DataTypes.LongType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition);
+    } else if (type == DataTypes.BinaryType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition);
+    } else if (type == DataTypes.StringType) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition)
+              .as(LogicalTypeAnnotation.stringType());
+    } else if (type == DataTypes.FloatType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition);
+    } else if (type == DataTypes.DoubleType) {
+      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition);
+    } else if (type == DataTypes.TimestampType) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MICROS));
+    } else if (type == TimestampNTZType$.MODULE$) {
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MICROS));
+    } else if (type instanceof DecimalType) {
+      DecimalType decimalType = (DecimalType) type;
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition)
+              .length(16) // always store as Decimal128
+              .as(LogicalTypeAnnotation.decimalType(decimalType.scale(), decimalType.precision()));
+    }
+    if (builder == null) {
+      throw new UnsupportedOperationException("Unsupported input Spark type: " + type);
+    }
+
+    return new ColumnDescriptor(path, builder.named(field.name()), 0, maxDefinitionLevel);
+  }
+
+  /**
+   * Check whether the Parquet 'descriptor' and Spark read type 'sparkType' are compatible. If not,
+   * throw exception.
+   *
+   * <p>This mostly follows the logic in Spark's
+   * ParquetVectorUpdaterFactory#getUpdater(ColumnDescriptor, DataType)
+   *
+   * @param descriptor descriptor for a Parquet primitive column
+   * @param sparkType Spark read type
+   */
+  public static void checkParquetType(ColumnDescriptor descriptor, DataType sparkType) {
+    PrimitiveType.PrimitiveTypeName typeName = descriptor.getPrimitiveType().getPrimitiveTypeName();
+    LogicalTypeAnnotation logicalTypeAnnotation =
+        descriptor.getPrimitiveType().getLogicalTypeAnnotation();
+    boolean allowTypePromotion = (boolean) CometConf.COMET_SCHEMA_EVOLUTION_ENABLED().get();
+
+    if (sparkType instanceof NullType) {
+      return;
+    }
+
+    switch (typeName) {
+      case BOOLEAN:
+        if (sparkType == DataTypes.BooleanType) return;
+        break;
+      case INT32:
+        if (sparkType == DataTypes.IntegerType || canReadAsIntDecimal(descriptor, sparkType)) {
+          return;
+        } else if (sparkType == DataTypes.LongType
+            && isUnsignedIntTypeMatched(logicalTypeAnnotation, 32)) {
+          // In `ParquetToSparkSchemaConverter`, we map parquet UINT32 to our LongType.
+          // For unsigned int32, it stores as plain signed int32 in Parquet when dictionary
+          // fallbacks. We read them as long values.
+          return;
+        } else if (sparkType == DataTypes.LongType && allowTypePromotion) {
+          // In Comet we allow schema evolution from int to long, if
+          // `spark.comet.schemaEvolution.enabled` is enabled.
+          return;
+        } else if (sparkType == DataTypes.ByteType || sparkType == DataTypes.ShortType) {
+          return;
+        } else if (sparkType == DataTypes.DateType) {
+          // TODO: use dateTimeRebaseMode from Spark side
+          return;
+        } else if (sparkType instanceof YearMonthIntervalType) {
+          return;
+        }
+        break;
+      case INT64:
+        if (sparkType == DataTypes.LongType || canReadAsLongDecimal(descriptor, sparkType)) {
+          return;
+        } else if (isLongDecimal(sparkType)
+            && isUnsignedIntTypeMatched(logicalTypeAnnotation, 64)) {
+          // In `ParquetToSparkSchemaConverter`, we map parquet UINT64 to our Decimal(20, 0).
+          // For unsigned int64, it stores as plain signed int64 in Parquet when dictionary
+          // fallbacks. We read them as decimal values.
+          return;
+        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MICROS)) {
+          validateTimestampType(logicalTypeAnnotation, sparkType);
+          // TODO: use dateTimeRebaseMode from Spark side
+          return;
+        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MILLIS)) {
+          validateTimestampType(logicalTypeAnnotation, sparkType);
+          return;
+        }
+        break;
+      case INT96:
+        if (sparkType == TimestampNTZType$.MODULE$) {
+          convertErrorForTimestampNTZ(typeName.name());
+        } else if (sparkType == DataTypes.TimestampType) {
+          return;
+        }
+        break;
+      case FLOAT:
+        if (sparkType == DataTypes.FloatType) return;
+        // In Comet we allow schema evolution from float to double, if
+        // `spark.comet.schemaEvolution.enabled` is enabled.
+        if (sparkType == DataTypes.DoubleType && allowTypePromotion) return;
+        break;
+      case DOUBLE:
+        if (sparkType == DataTypes.DoubleType) return;
+        break;
+      case BINARY:
+        if (sparkType == DataTypes.StringType
+            || sparkType == DataTypes.BinaryType
+            || canReadAsBinaryDecimal(descriptor, sparkType)) {
+          return;
+        }
+        break;
+      case FIXED_LEN_BYTE_ARRAY:
+        if (canReadAsIntDecimal(descriptor, sparkType)
+            || canReadAsLongDecimal(descriptor, sparkType)
+            || canReadAsBinaryDecimal(descriptor, sparkType)
+            || sparkType == DataTypes.BinaryType
+            // for uuid, since iceberg maps uuid to StringType
+            || sparkType == DataTypes.StringType) {
+          return;
+        }
+        break;
+      default:
+        break;
+    }
+
+    throw new SchemaColumnConvertNotSupportedException(
+        Arrays.toString(descriptor.getPath()),
+        descriptor.getPrimitiveType().getPrimitiveTypeName().toString(),
+        sparkType.catalogString());
+  }
+
+  private static void validateTimestampType(
+      LogicalTypeAnnotation logicalTypeAnnotation, DataType sparkType) {
+    assert (logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation);
+    // Throw an exception if the Parquet type is TimestampLTZ and the Catalyst type is TimestampNTZ.
+    // This is to avoid mistakes in reading the timestamp values.
+    if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC()
+        && sparkType == TimestampNTZType$.MODULE$) {
+      convertErrorForTimestampNTZ("int64 time(" + logicalTypeAnnotation + ")");
+    }
+  }
+
+  private static void convertErrorForTimestampNTZ(String parquetType) {
+    throw new RuntimeException(
+        "Unable to create Parquet converter for data type "
+            + TimestampNTZType$.MODULE$.json()
+            + " whose Parquet type is "
+            + parquetType);
+  }
+
+  private static boolean canReadAsIntDecimal(ColumnDescriptor descriptor, DataType dt) {
+    if (!DecimalType.is32BitDecimalType(dt)) return false;
+    return isDecimalTypeMatched(descriptor, dt);
+  }
+
+  private static boolean canReadAsLongDecimal(ColumnDescriptor descriptor, DataType dt) {
+    if (!DecimalType.is64BitDecimalType(dt)) return false;
+    return isDecimalTypeMatched(descriptor, dt);
+  }
+
+  private static boolean canReadAsBinaryDecimal(ColumnDescriptor descriptor, DataType dt) {
+    if (!DecimalType.isByteArrayDecimalType(dt)) return false;
+    return isDecimalTypeMatched(descriptor, dt);
+  }
+
+  private static boolean isLongDecimal(DataType dt) {
+    if (dt instanceof DecimalType) {
+      DecimalType d = (DecimalType) dt;
+      return d.precision() == 20 && d.scale() == 0;
+    }
+    return false;
+  }
+
+  private static boolean isDecimalTypeMatched(ColumnDescriptor descriptor, DataType dt) {
+    DecimalType d = (DecimalType) dt;
+    LogicalTypeAnnotation typeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
+    if (typeAnnotation instanceof DecimalLogicalTypeAnnotation) {
+      DecimalLogicalTypeAnnotation decimalType = (DecimalLogicalTypeAnnotation) typeAnnotation;
+      // It's OK if the required decimal precision is larger than or equal to the physical decimal
+      // precision in the Parquet metadata, as long as the decimal scale is the same.
+      return decimalType.getPrecision() <= d.precision() && decimalType.getScale() == d.scale();
+    }
+    return false;
+  }
+
+  private static boolean isTimestampTypeMatched(
+      LogicalTypeAnnotation logicalTypeAnnotation, LogicalTypeAnnotation.TimeUnit unit) {
+    return logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation
+        && ((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).getUnit() == unit;
+  }
+
+  private static boolean isUnsignedIntTypeMatched(
+      LogicalTypeAnnotation logicalTypeAnnotation, int bitWidth) {
+    return logicalTypeAnnotation instanceof IntLogicalTypeAnnotation
+        && !((IntLogicalTypeAnnotation) logicalTypeAnnotation).isSigned()
+        && ((IntLogicalTypeAnnotation) logicalTypeAnnotation).getBitWidth() == bitWidth;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/parquet/Utils.java b/common/src/main/java/org/apache/comet/parquet/Utils.java
new file mode 100644
index 000000000..95ca06cda
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/parquet/Utils.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.spark.sql.types.*;
+
+public class Utils {
+  public static ColumnReader getColumnReader(
+      DataType type,
+      ColumnDescriptor descriptor,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLazyMaterialization) {
+    // TODO: support `useLegacyDateTimestamp` for Iceberg
+    return getColumnReader(
+        type, descriptor, batchSize, useDecimal128, useLazyMaterialization, true);
+  }
+
+  public static ColumnReader getColumnReader(
+      DataType type,
+      ColumnDescriptor descriptor,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLazyMaterialization,
+      boolean useLegacyDateTimestamp) {
+    if (useLazyMaterialization && supportLazyMaterialization(type)) {
+      return new LazyColumnReader(
+          type, descriptor, batchSize, useDecimal128, useLegacyDateTimestamp);
+    } else {
+      return new ColumnReader(type, descriptor, batchSize, useDecimal128, useLegacyDateTimestamp);
+    }
+  }
+
+  private static boolean supportLazyMaterialization(DataType type) {
+    return (type instanceof StringType || type instanceof BinaryType);
+  }
+
+  /**
+   * Initialize the Comet native Parquet reader.
+   *
+   * @param descriptor the Parquet column descriptor for the column to be read
+   * @param readType the Spark read type used for type promotion. Null if promotion is not enabled.
+   * @param batchSize the batch size, i.e., maximum number of elements per record batch
+   * @param useDecimal128 whether to always represent decimals using 128 bits. If false, the native
+   *     reader may represent decimals using 32 or 64 bits, depending on the precision.
+   * @param useLegacyDateTimestampOrNTZ whether to read dates/timestamps that were written in the
+   *     legacy hybrid Julian + Gregorian calendar as it is. If false, throw exceptions instead. If
+   *     the spark type is TimestampNTZ, this should be true.
+   */
+  public static long initColumnReader(
+      ColumnDescriptor descriptor,
+      DataType readType,
+      int batchSize,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestampOrNTZ) {
+    PrimitiveType primitiveType = descriptor.getPrimitiveType();
+    int primitiveTypeId = getPhysicalTypeId(primitiveType.getPrimitiveTypeName());
+    LogicalTypeAnnotation annotation = primitiveType.getLogicalTypeAnnotation();
+
+    // Process logical type information
+
+    int bitWidth = -1;
+    boolean isSigned = false;
+    if (annotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
+      LogicalTypeAnnotation.IntLogicalTypeAnnotation intAnnotation =
+          (LogicalTypeAnnotation.IntLogicalTypeAnnotation) annotation;
+      bitWidth = intAnnotation.getBitWidth();
+      isSigned = intAnnotation.isSigned();
+    }
+
+    int precision, scale;
+    precision = scale = -1;
+    if (annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
+      LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalAnnotation =
+          (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) annotation;
+      precision = decimalAnnotation.getPrecision();
+      scale = decimalAnnotation.getScale();
+    }
+
+    int tu = -1;
+    boolean isAdjustedUtc = false;
+    if (annotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) {
+      LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampAnnotation =
+          (LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) annotation;
+      tu = getTimeUnitId(timestampAnnotation.getUnit());
+      isAdjustedUtc = timestampAnnotation.isAdjustedToUTC();
+    }
+
+    TypePromotionInfo promotionInfo;
+    if (readType != null) {
+      promotionInfo = new TypePromotionInfo(readType);
+    } else {
+      // If type promotion is not enable, we'll just use the Parquet primitive type and precision.
+      promotionInfo = new TypePromotionInfo(primitiveTypeId, precision);
+    }
+
+    return Native.initColumnReader(
+        primitiveTypeId,
+        getLogicalTypeId(annotation),
+        promotionInfo.physicalTypeId,
+        descriptor.getPath(),
+        descriptor.getMaxDefinitionLevel(),
+        descriptor.getMaxRepetitionLevel(),
+        bitWidth,
+        isSigned,
+        primitiveType.getTypeLength(),
+        precision,
+        promotionInfo.precision,
+        scale,
+        tu,
+        isAdjustedUtc,
+        batchSize,
+        useDecimal128,
+        useLegacyDateTimestampOrNTZ);
+  }
+
+  static class TypePromotionInfo {
+    // The Parquet physical type ID converted from the Spark read schema, or the original Parquet
+    // physical type ID if type promotion is not enabled.
+    int physicalTypeId;
+    // Decimal precision from the Spark read schema, or -1 if it's not decimal type.
+    int precision;
+
+    TypePromotionInfo(int physicalTypeId, int precision) {
+      this.physicalTypeId = physicalTypeId;
+      this.precision = precision;
+    }
+
+    TypePromotionInfo(DataType sparkReadType) {
+      // Create a dummy `StructField` from the input Spark type. We don't care about
+      // field name, nullability and metadata.
+      StructField f = new StructField("f", sparkReadType, false, Metadata.empty());
+      ColumnDescriptor descriptor = TypeUtil.convertToParquet(f);
+      PrimitiveType primitiveType = descriptor.getPrimitiveType();
+      int physicalTypeId = getPhysicalTypeId(primitiveType.getPrimitiveTypeName());
+      LogicalTypeAnnotation annotation = primitiveType.getLogicalTypeAnnotation();
+      int precision = -1;
+      if (annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
+        LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalAnnotation =
+            (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) annotation;
+        precision = decimalAnnotation.getPrecision();
+      }
+      this.physicalTypeId = physicalTypeId;
+      this.precision = precision;
+    }
+  }
+
+  /**
+   * Maps the input Parquet physical type 'typeName' to an integer representing it. This is used for
+   * serialization between the Java and native side.
+   *
+   * @param typeName enum for the Parquet physical type
+   * @return an integer representing the input physical type
+   */
+  static int getPhysicalTypeId(PrimitiveType.PrimitiveTypeName typeName) {
+    switch (typeName) {
+      case BOOLEAN:
+        return 0;
+      case INT32:
+        return 1;
+      case INT64:
+        return 2;
+      case INT96:
+        return 3;
+      case FLOAT:
+        return 4;
+      case DOUBLE:
+        return 5;
+      case BINARY:
+        return 6;
+      case FIXED_LEN_BYTE_ARRAY:
+        return 7;
+    }
+    throw new IllegalArgumentException("Invalid Parquet physical type: " + typeName);
+  }
+
+  /**
+   * Maps the input Parquet logical type 'annotation' to an integer representing it. This is used
+   * for serialization between the Java and native side.
+   *
+   * @param annotation the Parquet logical type annotation
+   * @return an integer representing the input logical type
+   */
+  static int getLogicalTypeId(LogicalTypeAnnotation annotation) {
+    if (annotation == null) {
+      return -1; // No logical type associated
+    } else if (annotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
+      return 0;
+    } else if (annotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
+      return 1;
+    } else if (annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
+      return 2;
+    } else if (annotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) {
+      return 3;
+    } else if (annotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) {
+      return 4;
+    } else if (annotation instanceof LogicalTypeAnnotation.EnumLogicalTypeAnnotation) {
+      return 5;
+    } else if (annotation instanceof LogicalTypeAnnotation.UUIDLogicalTypeAnnotation) {
+      return 6;
+    }
+
+    throw new UnsupportedOperationException("Unsupported Parquet logical type " + annotation);
+  }
+
+  static int getTimeUnitId(LogicalTypeAnnotation.TimeUnit tu) {
+    switch (tu) {
+      case MILLIS:
+        return 0;
+      case MICROS:
+        return 1;
+      case NANOS:
+        return 2;
+      default:
+        throw new UnsupportedOperationException("Unsupported TimeUnit " + tu);
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
new file mode 100644
index 000000000..fec266bf1
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.spark.sql.comet.util.Utils;
+import org.apache.spark.unsafe.Platform;
+
+/** A Comet vector whose elements are already decoded (i.e., materialized). */
+public abstract class CometDecodedVector extends CometVector {
+  /**
+   * The vector that stores all the values. For dictionary-backed vector, this is the vector of
+   * indices.
+   */
+  protected final ValueVector valueVector;
+
+  private boolean hasNull;
+  private int numNulls;
+  private int numValues;
+  private int validityByteCacheIndex = -1;
+  private byte validityByteCache;
+
+  protected CometDecodedVector(ValueVector vector, Field valueField, boolean useDecimal128) {
+    super(Utils.fromArrowField(valueField), useDecimal128);
+    this.valueVector = vector;
+    this.numNulls = valueVector.getNullCount();
+    this.numValues = valueVector.getValueCount();
+    this.hasNull = numNulls != 0;
+  }
+
+  @Override
+  ValueVector getValueVector() {
+    return valueVector;
+  }
+
+  @Override
+  public void setNumNulls(int numNulls) {
+    // We don't need to update null count in 'valueVector' since 'ValueVector.getNullCount' will
+    // re-compute the null count from validity buffer.
+    this.numNulls = numNulls;
+    this.hasNull = numNulls != 0;
+    this.validityByteCacheIndex = -1;
+  }
+
+  @Override
+  public void setNumValues(int numValues) {
+    this.numValues = numValues;
+    if (valueVector instanceof BaseVariableWidthVector) {
+      BaseVariableWidthVector bv = (BaseVariableWidthVector) valueVector;
+      // In case `lastSet` is smaller than `numValues`, `setValueCount` will set all the offsets
+      // within `[lastSet + 1, numValues)` to be empty, which is incorrect in our case.
+      //
+      // For instance, this can happen if one first call `setNumValues` with input 100, and then
+      // again `setNumValues` with 200. The first call will set `lastSet` to 99, while the second
+      // call will set all strings between indices `[100, 200)` to be empty.
+      bv.setLastSet(numValues);
+    }
+    valueVector.setValueCount(numValues);
+  }
+
+  public int numValues() {
+    return numValues;
+  }
+
+  @Override
+  public boolean hasNull() {
+    return hasNull;
+  }
+
+  @Override
+  public int numNulls() {
+    return numNulls;
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    if (!hasNull) return false;
+
+    int byteIndex = rowId >> 3;
+    if (byteIndex != validityByteCacheIndex) {
+      long validityBufferAddress = valueVector.getValidityBuffer().memoryAddress();
+      validityByteCache = Platform.getByte(null, validityBufferAddress + byteIndex);
+      validityByteCacheIndex = byteIndex;
+    }
+    return ((validityByteCache >> (rowId & 7)) & 1) == 0;
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometDelegateVector.java b/common/src/main/java/org/apache/comet/vector/CometDelegateVector.java
new file mode 100644
index 000000000..165af5631
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometDelegateVector.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/** A special Comet vector that just delegate all calls */
+public class CometDelegateVector extends CometVector {
+  protected CometVector delegate;
+
+  public CometDelegateVector(DataType dataType) {
+    this(dataType, null, false);
+  }
+
+  public CometDelegateVector(DataType dataType, boolean useDecimal128) {
+    this(dataType, null, useDecimal128);
+  }
+
+  public CometDelegateVector(DataType dataType, CometVector delegate, boolean useDecimal128) {
+    super(dataType, useDecimal128);
+    if (delegate instanceof CometDelegateVector) {
+      throw new IllegalArgumentException("cannot have nested delegation");
+    }
+    this.delegate = delegate;
+  }
+
+  public void setDelegate(CometVector delegate) {
+    this.delegate = delegate;
+  }
+
+  @Override
+  public void setNumNulls(int numNulls) {
+    delegate.setNumNulls(numNulls);
+  }
+
+  @Override
+  public void setNumValues(int numValues) {
+    delegate.setNumValues(numValues);
+  }
+
+  @Override
+  public int numValues() {
+    return delegate.numValues();
+  }
+
+  @Override
+  public boolean hasNull() {
+    return delegate.hasNull();
+  }
+
+  @Override
+  public int numNulls() {
+    return delegate.numNulls();
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    return delegate.isNullAt(rowId);
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    return delegate.getBoolean(rowId);
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return delegate.getByte(rowId);
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return delegate.getShort(rowId);
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    return delegate.getInt(rowId);
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    return delegate.getLong(rowId);
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    return delegate.getFloat(rowId);
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    return delegate.getDouble(rowId);
+  }
+
+  @Override
+  public Decimal getDecimal(int i, int precision, int scale) {
+    return delegate.getDecimal(i, precision, scale);
+  }
+
+  @Override
+  byte[] getBinaryDecimal(int i) {
+    return delegate.getBinaryDecimal(i);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    return delegate.getUTF8String(rowId);
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    return delegate.getBinary(rowId);
+  }
+
+  @Override
+  public ColumnarArray getArray(int i) {
+    return delegate.getArray(i);
+  }
+
+  @Override
+  public ColumnarMap getMap(int i) {
+    return delegate.getMap(i);
+  }
+
+  @Override
+  public ColumnVector getChild(int i) {
+    return delegate.getChild(i);
+  }
+
+  @Override
+  ValueVector getValueVector() {
+    return delegate.getValueVector();
+  }
+
+  @Override
+  public CometVector slice(int offset, int length) {
+    return delegate.slice(offset, length);
+  }
+
+  @Override
+  DictionaryProvider getDictionaryProvider() {
+    return delegate.getDictionaryProvider();
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometDictionary.java b/common/src/main/java/org/apache/comet/vector/CometDictionary.java
new file mode 100644
index 000000000..8c7046585
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometDictionary.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/** A dictionary which maps indices (integers) to values. */
+public class CometDictionary implements AutoCloseable {
+  private static final int DECIMAL_BYTE_WIDTH = 16;
+
+  private final CometPlainVector values;
+  private final int numValues;
+
+  /** Decoded dictionary values. Only one of the following is set. */
+  private byte[] bytes;
+
+  private short[] shorts;
+  private int[] ints;
+  private long[] longs;
+  private float[] floats;
+  private double[] doubles;
+  private boolean[] booleans;
+  private ByteArrayWrapper[] binaries;
+  private UTF8String[] strings;
+
+  public CometDictionary(CometPlainVector values) {
+    this.values = values;
+    this.numValues = values.numValues();
+    initialize();
+  }
+
+  public ValueVector getValueVector() {
+    return values.getValueVector();
+  }
+
+  public boolean decodeToBoolean(int index) {
+    return booleans[index];
+  }
+
+  public byte decodeToByte(int index) {
+    return bytes[index];
+  }
+
+  public short decodeToShort(int index) {
+    return shorts[index];
+  }
+
+  public int decodeToInt(int index) {
+    return ints[index];
+  }
+
+  public long decodeToLong(int index) {
+    return longs[index];
+  }
+
+  public float decodeToFloat(int index) {
+    return floats[index];
+  }
+
+  public double decodeToDouble(int index) {
+    return doubles[index];
+  }
+
+  public byte[] decodeToBinary(int index) {
+    return binaries[index].bytes;
+  }
+
+  public UTF8String decodeToUTF8String(int index) {
+    return strings[index];
+  }
+
+  @Override
+  public void close() {
+    values.close();
+  }
+
+  private void initialize() {
+    switch (values.getValueVector().getMinorType()) {
+      case BIT:
+        booleans = new boolean[numValues];
+        for (int i = 0; i < numValues; i++) {
+          booleans[i] = values.getBoolean(i);
+        }
+        break;
+      case TINYINT:
+        bytes = new byte[numValues];
+        for (int i = 0; i < numValues; i++) {
+          bytes[i] = values.getByte(i);
+        }
+        break;
+      case SMALLINT:
+        shorts = new short[numValues];
+        for (int i = 0; i < numValues; i++) {
+          shorts[i] = values.getShort(i);
+        }
+        break;
+      case INT:
+      case DATEDAY:
+        ints = new int[numValues];
+        for (int i = 0; i < numValues; i++) {
+          ints[i] = values.getInt(i);
+        }
+        break;
+      case BIGINT:
+      case TIMESTAMPMICRO:
+      case TIMESTAMPMICROTZ:
+        longs = new long[numValues];
+        for (int i = 0; i < numValues; i++) {
+          longs[i] = values.getLong(i);
+        }
+        break;
+      case FLOAT4:
+        floats = new float[numValues];
+        for (int i = 0; i < numValues; i++) {
+          floats[i] = values.getFloat(i);
+        }
+        break;
+      case FLOAT8:
+        doubles = new double[numValues];
+        for (int i = 0; i < numValues; i++) {
+          doubles[i] = values.getDouble(i);
+        }
+        break;
+      case VARBINARY:
+      case FIXEDSIZEBINARY:
+        binaries = new ByteArrayWrapper[numValues];
+        for (int i = 0; i < numValues; i++) {
+          binaries[i] = new ByteArrayWrapper(values.getBinary(i));
+        }
+        break;
+      case VARCHAR:
+        strings = new UTF8String[numValues];
+        for (int i = 0; i < numValues; i++) {
+          strings[i] = values.getUTF8String(i);
+        }
+        break;
+      case DECIMAL:
+        binaries = new ByteArrayWrapper[numValues];
+        for (int i = 0; i < numValues; i++) {
+          // Need copying here since we re-use byte array for decimal
+          byte[] bytes = values.getBinaryDecimal(i);
+          byte[] copy = new byte[DECIMAL_BYTE_WIDTH];
+          System.arraycopy(bytes, 0, copy, 0, DECIMAL_BYTE_WIDTH);
+          binaries[i] = new ByteArrayWrapper(copy);
+        }
+        break;
+      default:
+        throw new IllegalArgumentException(
+            "Invalid Arrow minor type: " + values.getValueVector().getMinorType());
+    }
+  }
+
+  private static class ByteArrayWrapper {
+    private final byte[] bytes;
+
+    ByteArrayWrapper(byte[] bytes) {
+      this.bytes = bytes;
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
new file mode 100644
index 000000000..225fcfc43
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.parquet.Preconditions;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/** A column vector whose elements are dictionary-encoded. */
+public class CometDictionaryVector extends CometDecodedVector {
+  public final CometPlainVector indices;
+  public final CometDictionary values;
+  public final DictionaryProvider provider;
+
+  /** Whether this vector is an alias sliced from another vector. */
+  private final boolean isAlias;
+
+  public CometDictionaryVector(
+      CometPlainVector indices,
+      CometDictionary values,
+      DictionaryProvider provider,
+      boolean useDecimal128) {
+    this(indices, values, provider, useDecimal128, false);
+  }
+
+  public CometDictionaryVector(
+      CometPlainVector indices,
+      CometDictionary values,
+      DictionaryProvider provider,
+      boolean useDecimal128,
+      boolean isAlias) {
+    super(indices.valueVector, values.getValueVector().getField(), useDecimal128);
+    Preconditions.checkArgument(
+        indices.valueVector instanceof IntVector, "'indices' should be a IntVector");
+    this.values = values;
+    this.indices = indices;
+    this.provider = provider;
+    this.isAlias = isAlias;
+  }
+
+  @Override
+  DictionaryProvider getDictionaryProvider() {
+    return this.provider;
+  }
+
+  @Override
+  public void close() {
+    super.close();
+    // Only close the values vector if this is not a sliced vector.
+    if (!isAlias) {
+      values.close();
+    }
+  }
+
+  @Override
+  public boolean getBoolean(int i) {
+    return values.decodeToBoolean(indices.getInt(i));
+  }
+
+  @Override
+  public byte getByte(int i) {
+    return values.decodeToByte(indices.getInt(i));
+  }
+
+  @Override
+  public short getShort(int i) {
+    return values.decodeToShort(indices.getInt(i));
+  }
+
+  @Override
+  public int getInt(int i) {
+    return values.decodeToInt(indices.getInt(i));
+  }
+
+  @Override
+  public long getLong(int i) {
+    return values.decodeToLong(indices.getInt(i));
+  }
+
+  @Override
+  public float getFloat(int i) {
+    return values.decodeToFloat(indices.getInt(i));
+  }
+
+  @Override
+  public double getDouble(int i) {
+    return values.decodeToDouble(indices.getInt(i));
+  }
+
+  @Override
+  public UTF8String getUTF8String(int i) {
+    return values.decodeToUTF8String(indices.getInt(i));
+  }
+
+  @Override
+  public byte[] getBinary(int i) {
+    return values.decodeToBinary(indices.getInt(i));
+  }
+
+  @Override
+  byte[] getBinaryDecimal(int i) {
+    return values.decodeToBinary(indices.getInt(i));
+  }
+
+  @Override
+  public CometVector slice(int offset, int length) {
+    TransferPair tp = indices.valueVector.getTransferPair(indices.valueVector.getAllocator());
+    tp.splitAndTransfer(offset, length);
+    CometPlainVector sliced = new CometPlainVector(tp.getTo(), useDecimal128);
+
+    // Set the alias flag to true so that the sliced vector will not close the dictionary vector.
+    // Otherwise, if the dictionary is closed, the sliced vector will not be able to access the
+    // dictionary.
+    return new CometDictionaryVector(sliced, values, provider, useDecimal128, true);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometLazyVector.java b/common/src/main/java/org/apache/comet/vector/CometLazyVector.java
new file mode 100644
index 000000000..17b8d7e71
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometLazyVector.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.spark.sql.types.DataType;
+
+import org.apache.comet.parquet.LazyColumnReader;
+
+public class CometLazyVector extends CometDelegateVector {
+  private final LazyColumnReader columnReader;
+
+  public CometLazyVector(DataType type, LazyColumnReader columnReader, boolean useDecimal128) {
+    super(type, useDecimal128);
+    this.columnReader = columnReader;
+  }
+
+  public CometDecodedVector getDecodedVector() {
+    return (CometDecodedVector) delegate;
+  }
+
+  @Override
+  public ValueVector getValueVector() {
+    columnReader.readAllBatch();
+    setDelegate(columnReader.loadVector());
+    return super.getValueVector();
+  }
+
+  @Override
+  public void setNumNulls(int numNulls) {
+    throw new UnsupportedOperationException("CometLazyVector doesn't support 'setNumNulls'");
+  }
+
+  @Override
+  public void setNumValues(int numValues) {
+    throw new UnsupportedOperationException("CometLazyVector doesn't support 'setNumValues'");
+  }
+
+  @Override
+  public void close() {
+    // Do nothing. 'vector' is closed by 'columnReader' which owns it.
+  }
+
+  @Override
+  public boolean hasNull() {
+    columnReader.readAllBatch();
+    setDelegate(columnReader.loadVector());
+    return super.hasNull();
+  }
+
+  @Override
+  public int numNulls() {
+    columnReader.readAllBatch();
+    setDelegate(columnReader.loadVector());
+    return super.numNulls();
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    if (columnReader.materializeUpToIfNecessary(rowId)) {
+      setDelegate(columnReader.loadVector());
+    }
+    return super.isNullAt(rowId);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometListVector.java b/common/src/main/java/org/apache/comet/vector/CometListVector.java
new file mode 100644
index 000000000..1c8f3e658
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometListVector.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import org.apache.arrow.vector.*;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+
+/** A Comet column vector for list type. */
+public class CometListVector extends CometDecodedVector {
+  final ListVector listVector;
+  final ValueVector dataVector;
+  final ColumnVector dataColumnVector;
+
+  public CometListVector(ValueVector vector, boolean useDecimal128) {
+    super(vector, vector.getField(), useDecimal128);
+
+    this.listVector = ((ListVector) vector);
+    this.dataVector = listVector.getDataVector();
+    this.dataColumnVector = getVector(dataVector, useDecimal128);
+  }
+
+  @Override
+  public ColumnarArray getArray(int i) {
+    int start = listVector.getOffsetBuffer().getInt(i * ListVector.OFFSET_WIDTH);
+    int end = listVector.getOffsetBuffer().getInt((i + 1) * ListVector.OFFSET_WIDTH);
+
+    return new ColumnarArray(dataColumnVector, start, end - start);
+  }
+
+  @Override
+  public CometVector slice(int offset, int length) {
+    TransferPair tp = this.valueVector.getTransferPair(this.valueVector.getAllocator());
+    tp.splitAndTransfer(offset, length);
+
+    return new CometListVector(tp.getTo(), useDecimal128);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
new file mode 100644
index 000000000..a7373590a
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.UUID;
+
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.vector.*;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.parquet.Preconditions;
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/** A column vector whose elements are plainly decoded. */
+public class CometPlainVector extends CometDecodedVector {
+  private final long valueBufferAddress;
+  private final boolean isBaseFixedWidthVector;
+
+  private byte booleanByteCache;
+  private int booleanByteCacheIndex = -1;
+
+  public CometPlainVector(ValueVector vector, boolean useDecimal128) {
+    super(vector, vector.getField(), useDecimal128);
+    // NullType doesn't have data buffer.
+    if (vector instanceof NullVector) {
+      this.valueBufferAddress = -1;
+    } else {
+      this.valueBufferAddress = vector.getDataBuffer().memoryAddress();
+    }
+
+    isBaseFixedWidthVector = valueVector instanceof BaseFixedWidthVector;
+  }
+
+  @Override
+  public void setNumNulls(int numNulls) {
+    super.setNumNulls(numNulls);
+    this.booleanByteCacheIndex = -1;
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    int byteIndex = rowId >> 3;
+    if (byteIndex != booleanByteCacheIndex) {
+      booleanByteCache = getByte(byteIndex);
+      booleanByteCacheIndex = byteIndex;
+    }
+    return ((booleanByteCache >> (rowId & 7)) & 1) == 1;
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return Platform.getByte(null, valueBufferAddress + rowId);
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return Platform.getShort(null, valueBufferAddress + rowId * 2L);
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    return Platform.getInt(null, valueBufferAddress + rowId * 4L);
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    return Platform.getLong(null, valueBufferAddress + rowId * 8L);
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    return Platform.getFloat(null, valueBufferAddress + rowId * 4L);
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    return Platform.getDouble(null, valueBufferAddress + rowId * 8L);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    if (!isBaseFixedWidthVector) {
+      BaseVariableWidthVector varWidthVector = (BaseVariableWidthVector) valueVector;
+      long offsetBufferAddress = varWidthVector.getOffsetBuffer().memoryAddress();
+      int offset = Platform.getInt(null, offsetBufferAddress + rowId * 4L);
+      int length = Platform.getInt(null, offsetBufferAddress + (rowId + 1L) * 4L) - offset;
+      return UTF8String.fromAddress(null, valueBufferAddress + offset, length);
+    } else {
+      // Iceberg maps UUID to StringType.
+      // The data type here must be UUID because the only FLBA -> String mapping we have is UUID.
+      BaseFixedWidthVector fixedWidthVector = (BaseFixedWidthVector) valueVector;
+      int length = fixedWidthVector.getTypeWidth();
+      int offset = rowId * length;
+      byte[] result = new byte[length];
+      Platform.copyMemory(
+          null, valueBufferAddress + offset, result, Platform.BYTE_ARRAY_OFFSET, length);
+      return UTF8String.fromString(convertToUuid(result).toString());
+    }
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    int offset;
+    int length;
+    if (valueVector instanceof BaseVariableWidthVector) {
+      BaseVariableWidthVector varWidthVector = (BaseVariableWidthVector) valueVector;
+      long offsetBufferAddress = varWidthVector.getOffsetBuffer().memoryAddress();
+      offset = Platform.getInt(null, offsetBufferAddress + rowId * 4L);
+      length = Platform.getInt(null, offsetBufferAddress + (rowId + 1L) * 4L) - offset;
+    } else if (valueVector instanceof BaseFixedWidthVector) {
+      BaseFixedWidthVector fixedWidthVector = (BaseFixedWidthVector) valueVector;
+      length = fixedWidthVector.getTypeWidth();
+      offset = rowId * length;
+    } else {
+      throw new RuntimeException("Unsupported binary vector type: " + valueVector.getName());
+    }
+    byte[] result = new byte[length];
+    Platform.copyMemory(
+        null, valueBufferAddress + offset, result, Platform.BYTE_ARRAY_OFFSET, length);
+    return result;
+  }
+
+  @Override
+  CDataDictionaryProvider getDictionaryProvider() {
+    return null;
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    if (this.valueBufferAddress == -1) {
+      return true;
+    } else {
+      return super.isNullAt(rowId);
+    }
+  }
+
+  @Override
+  public CometVector slice(int offset, int length) {
+    TransferPair tp = this.valueVector.getTransferPair(this.valueVector.getAllocator());
+    tp.splitAndTransfer(offset, length);
+
+    return new CometPlainVector(tp.getTo(), useDecimal128);
+  }
+
+  private static UUID convertToUuid(byte[] buf) {
+    Preconditions.checkArgument(buf.length == 16, "UUID require 16 bytes");
+    ByteBuffer bb = ByteBuffer.wrap(buf);
+    bb.order(ByteOrder.BIG_ENDIAN);
+    long mostSigBits = bb.getLong();
+    long leastSigBits = bb.getLong();
+    return new UUID(mostSigBits, leastSigBits);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometStructVector.java b/common/src/main/java/org/apache/comet/vector/CometStructVector.java
new file mode 100644
index 000000000..52dcd4326
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometStructVector.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.vector.*;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.spark.sql.vectorized.ColumnVector;
+
+/** A Comet column vector for struct type. */
+public class CometStructVector extends CometDecodedVector {
+  final List<ColumnVector> children;
+
+  public CometStructVector(ValueVector vector, boolean useDecimal128) {
+    super(vector, vector.getField(), useDecimal128);
+
+    StructVector structVector = ((StructVector) vector);
+
+    int size = structVector.size();
+    List<ColumnVector> children = new ArrayList<>();
+
+    for (int i = 0; i < size; ++i) {
+      ValueVector value = structVector.getVectorById(i);
+      children.add(getVector(value, useDecimal128));
+    }
+    this.children = children;
+  }
+
+  @Override
+  public ColumnVector getChild(int i) {
+    return children.get(i);
+  }
+
+  @Override
+  public CometVector slice(int offset, int length) {
+    TransferPair tp = this.valueVector.getTransferPair(this.valueVector.getAllocator());
+    tp.splitAndTransfer(offset, length);
+
+    return new CometStructVector(tp.getTo(), useDecimal128);
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometVector.java b/common/src/main/java/org/apache/comet/vector/CometVector.java
new file mode 100644
index 000000000..744f24395
--- /dev/null
+++ b/common/src/main/java/org/apache/comet/vector/CometVector.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.vector.FixedWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/** Base class for all Comet column vector implementations. */
+public abstract class CometVector extends ColumnVector {
+  private static final int DECIMAL_BYTE_WIDTH = 16;
+  private final byte[] DECIMAL_BYTES = new byte[DECIMAL_BYTE_WIDTH];
+  protected final boolean useDecimal128;
+
+  protected CometVector(DataType type, boolean useDecimal128) {
+    super(type);
+    this.useDecimal128 = useDecimal128;
+  }
+
+  /**
+   * Sets the number of nulls in this vector to be 'numNulls'. This is used when the vector is
+   * reused across batches.
+   */
+  public abstract void setNumNulls(int numNulls);
+
+  /**
+   * Sets the number of values (including both nulls and non-nulls) in this vector to be
+   * 'numValues'. This is used when the vector is reused across batches.
+   */
+  public abstract void setNumValues(int numValues);
+
+  /** Returns the number of values in this vector. */
+  public abstract int numValues();
+
+  /** Whether the elements of this vector are of fixed length. */
+  public boolean isFixedLength() {
+    return getValueVector() instanceof FixedWidthVector;
+  }
+
+  @Override
+  public Decimal getDecimal(int i, int precision, int scale) {
+    if (!useDecimal128 && precision <= Decimal.MAX_INT_DIGITS() && type instanceof IntegerType) {
+      return Decimal.createUnsafe(getInt(i), precision, scale);
+    } else if (!useDecimal128 && precision <= Decimal.MAX_LONG_DIGITS()) {
+      return Decimal.createUnsafe(getLong(i), precision, scale);
+    } else {
+      byte[] bytes = getBinaryDecimal(i);
+      BigInteger bigInteger = new BigInteger(bytes);
+      BigDecimal javaDecimal = new BigDecimal(bigInteger, scale);
+      try {
+        return Decimal.apply(javaDecimal, precision, scale);
+      } catch (ArithmeticException e) {
+        throw new ArithmeticException(
+            "Cannot convert "
+                + javaDecimal
+                + " (bytes: "
+                + bytes
+                + ", integer: "
+                + bigInteger
+                + ") to decimal with precision: "
+                + precision
+                + " and scale: "
+                + scale);
+      }
+    }
+  }
+
+  /** Reads a 16-byte byte array which are encoded big-endian for decimal128. */
+  byte[] getBinaryDecimal(int i) {
+    long valueBufferAddress = getValueVector().getDataBuffer().memoryAddress();
+    Platform.copyMemory(
+        null,
+        valueBufferAddress + (long) i * DECIMAL_BYTE_WIDTH,
+        DECIMAL_BYTES,
+        Platform.BYTE_ARRAY_OFFSET,
+        DECIMAL_BYTE_WIDTH);
+    // Decimal is stored little-endian in Arrow, so we need to reverse the bytes here
+    for (int j = 0, k = DECIMAL_BYTE_WIDTH - 1; j < DECIMAL_BYTE_WIDTH / 2; j++, k--) {
+      byte tmp = DECIMAL_BYTES[j];
+      DECIMAL_BYTES[j] = DECIMAL_BYTES[k];
+      DECIMAL_BYTES[k] = tmp;
+    }
+    return DECIMAL_BYTES;
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public ColumnarArray getArray(int i) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public ColumnarMap getMap(int i) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public ColumnVector getChild(int i) {
+    throw new UnsupportedOperationException("Not yet supported");
+  }
+
+  @Override
+  public void close() {
+    getValueVector().close();
+  }
+
+  DictionaryProvider getDictionaryProvider() {
+    throw new UnsupportedOperationException("Not implemented");
+  }
+
+  abstract ValueVector getValueVector();
+
+  /**
+   * Returns a zero-copying new vector that contains the values from [offset, offset + length).
+   *
+   * @param offset the offset of the new vector
+   * @param length the length of the new vector
+   * @return the new vector
+   */
+  public abstract CometVector slice(int offset, int length);
+
+  /**
+   * Returns a corresponding `CometVector` implementation based on the given Arrow `ValueVector`.
+   *
+   * @param vector Arrow `ValueVector`
+   * @param useDecimal128 Whether to use Decimal128 for decimal column
+   * @return `CometVector` implementation
+   */
+  protected static CometVector getVector(
+      ValueVector vector, boolean useDecimal128, DictionaryProvider dictionaryProvider) {
+    if (vector instanceof StructVector) {
+      return new CometStructVector(vector, useDecimal128);
+    } else if (vector instanceof ListVector) {
+      return new CometListVector(vector, useDecimal128);
+    } else {
+      DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary();
+      CometPlainVector cometVector = new CometPlainVector(vector, useDecimal128);
+
+      if (dictionaryEncoding == null) {
+        return cometVector;
+      } else {
+        Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId());
+        CometPlainVector dictionaryVector =
+            new CometPlainVector(dictionary.getVector(), useDecimal128);
+        CometDictionary cometDictionary = new CometDictionary(dictionaryVector);
+
+        return new CometDictionaryVector(
+            cometVector, cometDictionary, dictionaryProvider, useDecimal128);
+      }
+    }
+  }
+
+  protected static CometVector getVector(ValueVector vector, boolean useDecimal128) {
+    return getVector(vector, useDecimal128, null);
+  }
+}
diff --git a/common/src/main/resources/log4j2.properties b/common/src/main/resources/log4j2.properties
new file mode 100644
index 000000000..04cdf8533
--- /dev/null
+++ b/common/src/main/resources/log4j2.properties
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
+
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
new file mode 100644
index 000000000..7f83d9296
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -0,0 +1,454 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.sql.comet.util.Utils
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Configurations for a Comet application. Mostly inspired by [[SQLConf]] in Spark.
+ *
+ * To get the value of a Comet config key from a [[SQLConf]], you can do the following:
+ *
+ * {{{
+ *   CometConf.COMET_ENABLED.get
+ * }}}
+ *
+ * which retrieves the config value from the thread-local [[SQLConf]] object. Alternatively, you
+ * can also explicitly pass a [[SQLConf]] object to the `get` method.
+ */
+object CometConf {
+  def conf(key: String): ConfigBuilder = ConfigBuilder(key)
+
+  val COMET_EXEC_CONFIG_PREFIX = "spark.comet.exec";
+
+  val COMET_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.enabled")
+    .doc(
+      "Whether to enable Comet extension for Spark. When this is turned on, Spark will use " +
+        "Comet to read Parquet data source. Note that to enable native vectorized execution, " +
+        "both this config and 'spark.comet.exec.enabled' need to be enabled. By default, this " +
+        "config is the value of the env var `ENABLE_COMET` if set, or true otherwise.")
+    .booleanConf
+    .createWithDefault(sys.env.getOrElse("ENABLE_COMET", "true").toBoolean)
+
+  val COMET_SCAN_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.enabled")
+    .doc(
+      "Whether to enable Comet scan. When this is turned on, Spark will use Comet to read " +
+        "Parquet data source. Note that to enable native vectorized execution, both this " +
+        "config and 'spark.comet.exec.enabled' need to be enabled. By default, this config " +
+        "is true.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val COMET_EXEC_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.enabled")
+    .doc(
+      "Whether to enable Comet native vectorized execution for Spark. This controls whether " +
+        "Spark should convert operators into their Comet counterparts and execute them in " +
+        "native space. Note: each operator is associated with a separate config in the " +
+        "format of 'spark.comet.exec.<operator_name>.enabled' at the moment, and both the " +
+        "config and this need to be turned on, in order for the operator to be executed in " +
+        "native. By default, this config is false.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val COMET_MEMORY_OVERHEAD: OptionalConfigEntry[Long] = conf("spark.comet.memoryOverhead")
+    .doc(
+      "The amount of additional memory to be allocated per executor process for Comet, in MiB. " +
+        "This config is optional. If this is not specified, it will be set to " +
+        "`spark.comet.memory.overhead.factor` * `spark.executor.memory`. " +
+        "This is memory that accounts for things like Comet native execution, etc.")
+    .bytesConf(ByteUnit.MiB)
+    .createOptional
+
+  val COMET_MEMORY_OVERHEAD_FACTOR: ConfigEntry[Double] = conf(
+    "spark.comet.memory.overhead.factor")
+    .doc(
+      "Fraction of executor memory to be allocated as additional non-heap memory per executor " +
+        "process for Comet. Default value is 0.2.")
+    .doubleConf
+    .checkValue(
+      factor => factor > 0,
+      "Ensure that Comet memory overhead factor is a double greater than 0")
+    .createWithDefault(0.2)
+
+  val COMET_MEMORY_OVERHEAD_MIN_MIB: ConfigEntry[Long] = conf("spark.comet.memory.overhead.min")
+    .doc("Minimum amount of additional memory to be allocated per executor process for Comet, " +
+      "in MiB.")
+    .bytesConf(ByteUnit.MiB)
+    .checkValue(
+      _ >= 0,
+      "Ensure that Comet memory overhead min is a long greater than or equal to 0")
+    .createWithDefault(384)
+
+  val COMET_EXEC_ALL_OPERATOR_ENABLED: ConfigEntry[Boolean] = conf(
+    s"$COMET_EXEC_CONFIG_PREFIX.all.enabled")
+    .doc(
+      "Whether to enable all Comet operators. By default, this config is false. Note that " +
+        "this config precedes all separate config 'spark.comet.exec.<operator_name>.enabled'. " +
+        "That being said, if this config is enabled, separate configs are ignored.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val COMET_EXEC_ALL_EXPR_ENABLED: ConfigEntry[Boolean] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.all.expr.enabled")
+      .doc(
+        "Whether to enable all Comet exprs. By default, this config is false. Note that " +
+          "this config precedes all separate config 'spark.comet.exec.<expr_name>.enabled'. " +
+          "That being said, if this config is enabled, separate configs are ignored.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val COMET_DEBUG_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.debug.enabled")
+      .doc(
+        "Whether to enable debug mode for Comet. By default, this config is false. " +
+          "When enabled, Comet will do additional checks for debugging purpose. For example, " +
+          "validating array when importing arrays from JVM at native side. Note that these " +
+          "checks may be expensive in performance and should only be enabled for debugging " +
+          "purpose.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val COMET_BATCH_SIZE: ConfigEntry[Int] = conf("spark.comet.batchSize")
+    .doc("The columnar batch size, i.e., the maximum number of rows that a batch can contain.")
+    .intConf
+    .createWithDefault(8192)
+
+  val COMET_EXEC_MEMORY_FRACTION: ConfigEntry[Double] = conf("spark.comet.exec.memoryFraction")
+    .doc(
+      "The fraction of memory from Comet memory overhead that the native memory " +
+        "manager can use for execution. The purpose of this config is to set aside memory for " +
+        "untracked data structures, as well as imprecise size estimation during memory " +
+        "acquisition. Default value is 0.7.")
+    .doubleConf
+    .createWithDefault(0.7)
+
+  val COMET_PARQUET_ENABLE_DIRECT_BUFFER: ConfigEntry[Boolean] = conf(
+    "spark.comet.parquet.enable.directBuffer")
+    .doc("Whether to use Java direct byte buffer when reading Parquet. By default, this is false")
+    .booleanConf
+    .createWithDefault(false)
+
+  val COMET_SCAN_PREFETCH_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.scan.preFetch.enabled")
+      .doc("Whether to enable pre-fetching feature of CometScan. By default is disabled.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val COMET_SCAN_PREFETCH_THREAD_NUM: ConfigEntry[Int] =
+    conf("spark.comet.scan.preFetch.threadNum")
+      .doc(
+        "The number of threads running pre-fetching for CometScan. Effective if " +
+          s"${COMET_SCAN_PREFETCH_ENABLED.key} is enabled. By default it is 2. Note that more " +
+          "pre-fetching threads means more memory requirement to store pre-fetched row groups.")
+      .intConf
+      .createWithDefault(2)
+
+  val COMET_NATIVE_LOAD_REQUIRED: ConfigEntry[Boolean] = conf("spark.comet.nativeLoadRequired")
+    .doc(
+      "Whether to require Comet native library to load successfully when Comet is enabled. " +
+        "If not, Comet will silently fallback to Spark when it fails to load the native lib. " +
+        "Otherwise, an error will be thrown and the Spark job will be aborted.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP: ConfigEntry[Boolean] =
+    conf("spark.comet.exceptionOnDatetimeRebase")
+      .doc("Whether to throw exception when seeing dates/timestamps from the legacy hybrid " +
+        "(Julian + Gregorian) calendar. Since Spark 3, dates/timestamps were written according " +
+        "to the Proleptic Gregorian calendar. When this is true, Comet will " +
+        "throw exceptions when seeing these dates/timestamps that were written by Spark version " +
+        "before 3.0. If this is false, these dates/timestamps will be read as if they were " +
+        "written to the Proleptic Gregorian calendar and will not be rebased.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val COMET_USE_DECIMAL_128: ConfigEntry[Boolean] = conf("spark.comet.use.decimal128")
+    .internal()
+    .doc("If true, Comet will always use 128 bits to represent a decimal value, regardless of " +
+      "its precision. If false, Comet will use 32, 64 and 128 bits respectively depending on " +
+      "the precision. N.B. this is NOT a user-facing config but should be inferred and set by " +
+      "Comet itself.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val COMET_USE_LAZY_MATERIALIZATION: ConfigEntry[Boolean] = conf(
+    "spark.comet.use.lazyMaterialization")
+    .internal()
+    .doc(
+      "Whether to enable lazy materialization for Comet. When this is turned on, Comet will " +
+        "read Parquet data source lazily for string and binary columns. For filter operations, " +
+        "lazy materialization will improve read performance by skipping unused pages.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val COMET_SCHEMA_EVOLUTION_ENABLED: ConfigEntry[Boolean] = conf(
+    "spark.comet.schemaEvolution.enabled")
+    .internal()
+    .doc(
+      "Whether to enable schema evolution in Comet. For instance, promoting a integer " +
+        "column to a long column, a float column to a double column, etc. This is automatically" +
+        "enabled when reading from Iceberg tables.")
+    .booleanConf
+    .createWithDefault(false)
+}
+
+object ConfigHelpers {
+  def toNumber[T](s: String, converter: String => T, key: String, configType: String): T = {
+    try {
+      converter(s.trim)
+    } catch {
+      case _: NumberFormatException =>
+        throw new IllegalArgumentException(s"$key should be $configType, but was $s")
+    }
+  }
+
+  def toBoolean(s: String, key: String): Boolean = {
+    try {
+      s.trim.toBoolean
+    } catch {
+      case _: IllegalArgumentException =>
+        throw new IllegalArgumentException(s"$key should be boolean, but was $s")
+    }
+  }
+
+  def stringToSeq[T](str: String, converter: String => T): Seq[T] = {
+    Utils.stringToSeq(str).map(converter)
+  }
+
+  def seqToString[T](v: Seq[T], stringConverter: T => String): String = {
+    v.map(stringConverter).mkString(",")
+  }
+
+  def timeFromString(str: String, unit: TimeUnit): Long = JavaUtils.timeStringAs(str, unit)
+
+  def timeToString(v: Long, unit: TimeUnit): String =
+    TimeUnit.MILLISECONDS.convert(v, unit) + "ms"
+
+  def byteFromString(str: String, unit: ByteUnit): Long = {
+    val (input, multiplier) =
+      if (str.nonEmpty && str.charAt(0) == '-') {
+        (str.substring(1), -1)
+      } else {
+        (str, 1)
+      }
+    multiplier * JavaUtils.byteStringAs(input, unit)
+  }
+
+  def byteToString(v: Long, unit: ByteUnit): String = unit.convertTo(v, ByteUnit.BYTE) + "b"
+}
+
+private class TypedConfigBuilder[T](
+    val parent: ConfigBuilder,
+    val converter: String => T,
+    val stringConverter: T => String) {
+
+  import ConfigHelpers._
+
+  def this(parent: ConfigBuilder, converter: String => T) = {
+    this(parent, converter, Option(_).map(_.toString).orNull)
+  }
+
+  /** Apply a transformation to the user-provided values of the config entry. */
+  def transform(fn: T => T): TypedConfigBuilder[T] = {
+    new TypedConfigBuilder(parent, s => fn(converter(s)), stringConverter)
+  }
+
+  /** Checks if the user-provided value for the config matches the validator. */
+  def checkValue(validator: T => Boolean, errorMsg: String): TypedConfigBuilder[T] = {
+    transform { v =>
+      if (!validator(v)) {
+        throw new IllegalArgumentException(s"'$v' in ${parent.key} is invalid. $errorMsg")
+      }
+      v
+    }
+  }
+
+  /** Check that user-provided values for the config match a pre-defined set. */
+  def checkValues(validValues: Set[T]): TypedConfigBuilder[T] = {
+    transform { v =>
+      if (!validValues.contains(v)) {
+        throw new IllegalArgumentException(
+          s"The value of ${parent.key} should be one of ${validValues.mkString(", ")}, but was $v")
+      }
+      v
+    }
+  }
+
+  /** Turns the config entry into a sequence of values of the underlying type. */
+  def toSequence: TypedConfigBuilder[Seq[T]] = {
+    new TypedConfigBuilder(parent, stringToSeq(_, converter), seqToString(_, stringConverter))
+  }
+
+  /** Creates a [[ConfigEntry]] that does not have a default value. */
+  def createOptional: OptionalConfigEntry[T] = {
+    new OptionalConfigEntry[T](
+      parent.key,
+      converter,
+      stringConverter,
+      parent._doc,
+      parent._public,
+      parent._version)
+  }
+
+  /** Creates a [[ConfigEntry]] that has a default value. */
+  def createWithDefault(default: T): ConfigEntry[T] = {
+    val transformedDefault = converter(stringConverter(default))
+    new ConfigEntryWithDefault[T](
+      parent.key,
+      transformedDefault,
+      converter,
+      stringConverter,
+      parent._doc,
+      parent._public,
+      parent._version)
+  }
+}
+
+private[comet] abstract class ConfigEntry[T](
+    val key: String,
+    val valueConverter: String => T,
+    val stringConverter: T => String,
+    val doc: String,
+    val isPublic: Boolean,
+    val version: String) {
+
+  /**
+   * Retrieves the config value from the given [[SQLConf]].
+   */
+  def get(conf: SQLConf): T
+
+  /**
+   * Retrieves the config value from the current thread-local [[SQLConf]]
+   * @return
+   */
+  def get(): T = get(SQLConf.get)
+
+  def defaultValue: Option[T] = None
+  def defaultValueString: String
+
+  override def toString: String = {
+    s"ConfigEntry(key=$key, defaultValue=$defaultValueString, doc=$doc, " +
+      s"public=$isPublic, version=$version)"
+  }
+}
+
+private[comet] class ConfigEntryWithDefault[T](
+    key: String,
+    _defaultValue: T,
+    valueConverter: String => T,
+    stringConverter: T => String,
+    doc: String,
+    isPublic: Boolean,
+    version: String)
+    extends ConfigEntry(key, valueConverter, stringConverter, doc, isPublic, version) {
+  override def defaultValue: Option[T] = Some(_defaultValue)
+  override def defaultValueString: String = stringConverter(_defaultValue)
+
+  def get(conf: SQLConf): T = {
+    val tmp = conf.getConfString(key, null)
+    if (tmp == null) {
+      _defaultValue
+    } else {
+      valueConverter(tmp)
+    }
+  }
+}
+
+private[comet] class OptionalConfigEntry[T](
+    key: String,
+    val rawValueConverter: String => T,
+    val rawStringConverter: T => String,
+    doc: String,
+    isPublic: Boolean,
+    version: String)
+    extends ConfigEntry[Option[T]](
+      key,
+      s => Some(rawValueConverter(s)),
+      v => v.map(rawStringConverter).orNull,
+      doc,
+      isPublic,
+      version) {
+
+  override def defaultValueString: String = ConfigEntry.UNDEFINED
+
+  override def get(conf: SQLConf): Option[T] = {
+    Option(conf.getConfString(key, null)).map(rawValueConverter)
+  }
+}
+
+private[comet] case class ConfigBuilder(key: String) {
+  import ConfigHelpers._
+
+  var _public = true
+  var _doc = ""
+  var _version = ""
+
+  def internal(): ConfigBuilder = {
+    _public = false
+    this
+  }
+
+  def doc(s: String): ConfigBuilder = {
+    _doc = s
+    this
+  }
+
+  def version(v: String): ConfigBuilder = {
+    _version = v
+    this
+  }
+
+  def intConf: TypedConfigBuilder[Int] = {
+    new TypedConfigBuilder(this, toNumber(_, _.toInt, key, "int"))
+  }
+
+  def longConf: TypedConfigBuilder[Long] = {
+    new TypedConfigBuilder(this, toNumber(_, _.toLong, key, "long"))
+  }
+
+  def doubleConf: TypedConfigBuilder[Double] = {
+    new TypedConfigBuilder(this, toNumber(_, _.toDouble, key, "double"))
+  }
+
+  def booleanConf: TypedConfigBuilder[Boolean] = {
+    new TypedConfigBuilder(this, toBoolean(_, key))
+  }
+
+  def stringConf: TypedConfigBuilder[String] = {
+    new TypedConfigBuilder(this, v => v)
+  }
+
+  def timeConf(unit: TimeUnit): TypedConfigBuilder[Long] = {
+    new TypedConfigBuilder(this, timeFromString(_, unit), timeToString(_, unit))
+  }
+
+  def bytesConf(unit: ByteUnit): TypedConfigBuilder[Long] = {
+    new TypedConfigBuilder(this, byteFromString(_, unit), byteToString(_, unit))
+  }
+}
+
+private object ConfigEntry {
+  val UNDEFINED = "<undefined>"
+}
diff --git a/common/src/main/scala/org/apache/comet/Constants.scala b/common/src/main/scala/org/apache/comet/Constants.scala
new file mode 100644
index 000000000..83b570fc3
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/Constants.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+object Constants {
+  val LOG_CONF_PATH = "comet.log.file.path"
+  val LOG_CONF_NAME = "log4rs.yaml"
+}
diff --git a/common/src/main/scala/org/apache/comet/package.scala b/common/src/main/scala/org/apache/comet/package.scala
new file mode 100644
index 000000000..c9aca7538
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/package.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache
+
+import java.util.Properties
+
+package object comet {
+
+  /**
+   * Provides access to build information about the Comet libraries. This will be used by the
+   * benchmarking software to provide the source revision and repository. In addition, the build
+   * information is included to aid in future debugging efforts for releases.
+   */
+  private object CometBuildInfo {
+
+    val (
+      cometVersion: String,
+      cometBranch: String,
+      cometRevision: String,
+      cometBuildUserName: String,
+      cometBuildUserEmail: String,
+      cometRepoUrl: String,
+      cometBuildTimestamp: String) = {
+      val resourceStream = Thread
+        .currentThread()
+        .getContextClassLoader
+        .getResourceAsStream("comet-git-info.properties")
+      if (resourceStream == null) {
+        throw new CometRuntimeException("Could not find comet-git-info.properties")
+      }
+
+      try {
+        val unknownProp = "<unknown>"
+        val props = new Properties()
+        props.load(resourceStream)
+        (
+          props.getProperty("git.build.version", unknownProp),
+          props.getProperty("git.branch", unknownProp),
+          props.getProperty("git.commit.id.full", unknownProp),
+          props.getProperty("git.build.user.name", unknownProp),
+          props.getProperty("git.build.user.email", unknownProp),
+          props.getProperty("git.remote.origin.url", unknownProp),
+          props.getProperty("git.build.time", unknownProp))
+      } catch {
+        case e: Exception =>
+          throw new CometRuntimeException(
+            "Error loading properties from comet-git-info.properties",
+            e)
+      } finally {
+        if (resourceStream != null) {
+          try {
+            resourceStream.close()
+          } catch {
+            case e: Exception =>
+              throw new CometRuntimeException("Error closing Comet build info resource stream", e)
+          }
+        }
+      }
+    }
+  }
+
+  val COMET_VERSION = CometBuildInfo.cometVersion
+  val COMET_BRANCH = CometBuildInfo.cometBranch
+  val COMET_REVISION = CometBuildInfo.cometRevision
+  val COMET_BUILD_USER_EMAIL = CometBuildInfo.cometBuildUserEmail
+  val COMET_BUILD_USER_NAME = CometBuildInfo.cometBuildUserName
+  val COMET_REPO_URL = CometBuildInfo.cometRepoUrl
+  val COMET_BUILD_TIMESTAMP = CometBuildInfo.cometBuildTimestamp
+
+}
diff --git a/common/src/main/scala/org/apache/comet/parquet/CometParquetUtils.scala b/common/src/main/scala/org/apache/comet/parquet/CometParquetUtils.scala
new file mode 100644
index 000000000..d851067b5
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/parquet/CometParquetUtils.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+object CometParquetUtils {
+  private val PARQUET_FIELD_ID_WRITE_ENABLED = "spark.sql.parquet.fieldId.write.enabled"
+  private val PARQUET_FIELD_ID_READ_ENABLED = "spark.sql.parquet.fieldId.read.enabled"
+  private val IGNORE_MISSING_PARQUET_FIELD_ID = "spark.sql.parquet.fieldId.read.ignoreMissing"
+
+  def writeFieldId(conf: SQLConf): Boolean =
+    conf.getConfString(PARQUET_FIELD_ID_WRITE_ENABLED, "false").toBoolean
+
+  def writeFieldId(conf: Configuration): Boolean =
+    conf.getBoolean(PARQUET_FIELD_ID_WRITE_ENABLED, false)
+
+  def readFieldId(conf: SQLConf): Boolean =
+    conf.getConfString(PARQUET_FIELD_ID_READ_ENABLED, "false").toBoolean
+
+  def ignoreMissingIds(conf: SQLConf): Boolean =
+    conf.getConfString(IGNORE_MISSING_PARQUET_FIELD_ID, "false").toBoolean
+
+  // The following is copied from QueryExecutionErrors
+  // TODO: remove after dropping Spark 3.2.0 support and directly use
+  //       QueryExecutionErrors.foundDuplicateFieldInFieldIdLookupModeError
+  def foundDuplicateFieldInFieldIdLookupModeError(
+      requiredId: Int,
+      matchedFields: String): Throwable = {
+    new RuntimeException(s"""
+         |Found duplicate field(s) "$requiredId": $matchedFields
+         |in id mapping mode
+     """.stripMargin.replaceAll("\n", " "))
+  }
+
+  // The followings are copied from org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
+  // TODO: remove after dropping Spark 3.2.0 support and directly use ParquetUtils
+  /**
+   * A StructField metadata key used to set the field id of a column in the Parquet schema.
+   */
+  val FIELD_ID_METADATA_KEY = "parquet.field.id"
+
+  /**
+   * Whether there exists a field in the schema, whether inner or leaf, has the parquet field ID
+   * metadata.
+   */
+  def hasFieldIds(schema: StructType): Boolean = {
+    def recursiveCheck(schema: DataType): Boolean = {
+      schema match {
+        case st: StructType =>
+          st.exists(field => hasFieldId(field) || recursiveCheck(field.dataType))
+
+        case at: ArrayType => recursiveCheck(at.elementType)
+
+        case mt: MapType => recursiveCheck(mt.keyType) || recursiveCheck(mt.valueType)
+
+        case _ =>
+          // No need to really check primitive types, just to terminate the recursion
+          false
+      }
+    }
+    if (schema.isEmpty) false else recursiveCheck(schema)
+  }
+
+  def hasFieldId(field: StructField): Boolean =
+    field.metadata.contains(FIELD_ID_METADATA_KEY)
+
+  def getFieldId(field: StructField): Int = {
+    require(
+      hasFieldId(field),
+      s"The key `$FIELD_ID_METADATA_KEY` doesn't exist in the metadata of " + field)
+    try {
+      Math.toIntExact(field.metadata.getLong(FIELD_ID_METADATA_KEY))
+    } catch {
+      case _: ArithmeticException | _: ClassCastException =>
+        throw new IllegalArgumentException(
+          s"The key `$FIELD_ID_METADATA_KEY` must be a 32-bit integer")
+    }
+  }
+}
diff --git a/common/src/main/scala/org/apache/comet/parquet/CometReaderThreadPool.scala b/common/src/main/scala/org/apache/comet/parquet/CometReaderThreadPool.scala
new file mode 100644
index 000000000..ca13bba0c
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/parquet/CometReaderThreadPool.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import java.util.concurrent.{Executors, ExecutorService, ThreadFactory}
+import java.util.concurrent.atomic.AtomicLong
+
+abstract class CometReaderThreadPool {
+  private var threadPool: Option[ExecutorService] = None
+
+  protected def threadNamePrefix: String
+
+  private def initThreadPool(maxThreads: Int): ExecutorService = synchronized {
+    if (threadPool.isEmpty) {
+      val threadFactory: ThreadFactory = new ThreadFactory() {
+        private val defaultThreadFactory = Executors.defaultThreadFactory
+        val count = new AtomicLong(0)
+
+        override def newThread(r: Runnable): Thread = {
+          val thread = defaultThreadFactory.newThread(r)
+          thread.setName(s"${threadNamePrefix}_${count.getAndIncrement()}")
+          thread.setDaemon(true)
+          thread
+        }
+      }
+
+      val threadPoolExecutor = Executors.newFixedThreadPool(maxThreads, threadFactory)
+      threadPool = Some(threadPoolExecutor)
+    }
+
+    threadPool.get
+  }
+
+  def getOrCreateThreadPool(numThreads: Int): ExecutorService = {
+    threadPool.getOrElse(initThreadPool(numThreads))
+  }
+
+}
+
+// A thread pool used for pre-fetching files.
+object CometPrefetchThreadPool extends CometReaderThreadPool {
+  override def threadNamePrefix: String = "prefetch_thread"
+}
+
+// Thread pool used by the Parquet parallel reader
+object CometFileReaderThreadPool extends CometReaderThreadPool {
+  override def threadNamePrefix: String = "file_reader_thread"
+}
diff --git a/common/src/main/scala/org/apache/comet/shims/ShimBatchReader.scala b/common/src/main/scala/org/apache/comet/shims/ShimBatchReader.scala
new file mode 100644
index 000000000..ece4cfbe5
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/shims/ShimBatchReader.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+object ShimBatchReader {
+
+  // TODO: remove after dropping Spark 3.2 & 3.3 support and directly call PartitionedFile
+  def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile =
+    classOf[PartitionedFile].getDeclaredConstructors
+      .map(c =>
+        c.getParameterCount match {
+          case 5 =>
+            c.newInstance(
+              partitionValues,
+              file,
+              Long.box(-1), // -1 means we read the entire file
+              Long.box(-1),
+              Array.empty[String])
+          case 7 =>
+            c.newInstance(
+              partitionValues,
+              c.getParameterTypes()(1)
+                .getConstructor(classOf[String])
+                .newInstance(file)
+                .asInstanceOf[AnyRef],
+              Long.box(-1), // -1 means we read the entire file
+              Long.box(-1),
+              Array.empty[String],
+              Long.box(0),
+              Long.box(0))
+        })
+      .head
+      .asInstanceOf[PartitionedFile]
+}
diff --git a/common/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala b/common/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala
new file mode 100644
index 000000000..5ab7eaf4f
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/shims/ShimFileFormat.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+object ShimFileFormat {
+
+  // TODO: remove after dropping Spark 3.2 & 3.3 support and directly use FileFormat.ROW_INDEX
+  val ROW_INDEX = "row_index"
+
+  // A name for a temporary column that holds row indexes computed by the file format reader
+  // until they can be placed in the _metadata struct.
+  // TODO: remove after dropping Spark 3.2 & 3.3 support and directly use
+  //       FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+  val ROW_INDEX_TEMPORARY_COLUMN_NAME: String = s"_tmp_metadata_$ROW_INDEX"
+
+  // TODO: remove after dropping Spark 3.2 support and use FileFormat.OPTION_RETURNING_BATCH
+  val OPTION_RETURNING_BATCH = "returning_batch"
+}
diff --git a/common/src/main/scala/org/apache/comet/shims/ShimResolveDefaultColumns.scala b/common/src/main/scala/org/apache/comet/shims/ShimResolveDefaultColumns.scala
new file mode 100644
index 000000000..8a30c8e00
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/shims/ShimResolveDefaultColumns.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import scala.util.Try
+
+import org.apache.spark.sql.types.{StructField, StructType}
+
+object ShimResolveDefaultColumns {
+  // TODO: remove after dropping Spark 3.2 & 3.3 support and directly use ResolveDefaultColumns
+  def getExistenceDefaultValue(field: StructField): Any =
+    Try {
+      // scalastyle:off classforname
+      Class.forName("org.apache.spark.sql.catalyst.util.ResolveDefaultColumns$")
+      // scalastyle:on classforname
+    }.map { objClass =>
+      val objInstance = objClass.getField("MODULE$").get(null)
+      val method = objClass.getMethod("getExistenceDefaultValues", classOf[StructType])
+      method.invoke(objInstance, StructType(Seq(field))).asInstanceOf[Array[Any]].head
+    }.getOrElse(null)
+}
diff --git a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
new file mode 100644
index 000000000..1e27ed8f0
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector
+
+import scala.collection.mutable
+
+import org.apache.arrow.c.{ArrowArray, ArrowImporter, ArrowSchema, CDataDictionaryProvider, Data}
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector._
+import org.apache.spark.SparkException
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+class NativeUtil {
+  private val allocator = new RootAllocator(Long.MaxValue)
+  private val dictionaryProvider: CDataDictionaryProvider = new CDataDictionaryProvider
+  private val importer = new ArrowImporter(allocator)
+
+  /**
+   * Exports a Comet `ColumnarBatch` into a list of memory addresses that can be consumed by the
+   * native execution.
+   *
+   * @param batch
+   *   the input Comet columnar batch
+   * @return
+   *   a list containing pairs of memory addresses in the format of (address of Arrow array,
+   *   address of Arrow schema)
+   */
+  def exportBatch(batch: ColumnarBatch): Array[Long] = {
+    val vectors = (0 until batch.numCols()).flatMap { index =>
+      batch.column(index) match {
+        case a: CometVector =>
+          val valueVector = a.getValueVector
+
+          val provider = if (valueVector.getField.getDictionary != null) {
+            a.getDictionaryProvider
+          } else {
+            null
+          }
+
+          val arrowSchema = ArrowSchema.allocateNew(allocator)
+          val arrowArray = ArrowArray.allocateNew(allocator)
+          Data.exportVector(
+            allocator,
+            getFieldVector(valueVector),
+            provider,
+            arrowArray,
+            arrowSchema)
+
+          Seq((arrowArray, arrowSchema))
+        case c =>
+          throw new SparkException(
+            "Comet execution only takes Arrow Arrays, but got " +
+              s"${c.getClass}")
+      }
+    }
+
+    vectors.flatMap { pair =>
+      Seq(pair._1.memoryAddress(), pair._2.memoryAddress())
+    }.toArray
+  }
+
+  /**
+   * Imports a list of Arrow addresses from native execution, and return a list of Comet vectors.
+   *
+   * @param arrayAddress
+   *   a list containing paris of Arrow addresses from the native, in the format of (address of
+   *   Arrow array, address of Arrow schema)
+   * @return
+   *   a list of Comet vectors
+   */
+  def importVector(arrayAddress: Array[Long]): Seq[CometVector] = {
+    val arrayVectors = mutable.ArrayBuffer.empty[CometVector]
+
+    for (i <- arrayAddress.indices by 2) {
+      val arrowSchema = ArrowSchema.wrap(arrayAddress(i + 1))
+      val arrowArray = ArrowArray.wrap(arrayAddress(i))
+
+      // Native execution should always have 'useDecimal128' set to true since it doesn't support
+      // other cases.
+      arrayVectors += CometVector.getVector(
+        importer.importVector(arrowArray, arrowSchema, dictionaryProvider),
+        true,
+        dictionaryProvider)
+
+      arrowArray.close()
+      arrowSchema.close()
+    }
+    arrayVectors.toSeq
+  }
+
+  /**
+   * Takes zero-copy slices of the input batch with given start index and maximum number of rows.
+   *
+   * @param batch
+   *   Input batch
+   * @param startIndex
+   *   Start index of the slice
+   * @param maxNumRows
+   *   Maximum number of rows in the slice
+   * @return
+   *   A new batch with the sliced vectors
+   */
+  def takeRows(batch: ColumnarBatch, startIndex: Int, maxNumRows: Int): ColumnarBatch = {
+    val arrayVectors = mutable.ArrayBuffer.empty[CometVector]
+
+    for (i <- 0 until batch.numCols()) {
+      val column = batch.column(i).asInstanceOf[CometVector]
+      arrayVectors += column.slice(startIndex, maxNumRows)
+    }
+
+    new ColumnarBatch(arrayVectors.toArray, maxNumRows)
+  }
+
+  private def getFieldVector(valueVector: ValueVector): FieldVector = {
+    valueVector match {
+      case v @ (_: BitVector | _: TinyIntVector | _: SmallIntVector | _: IntVector |
+          _: BigIntVector | _: Float4Vector | _: Float8Vector | _: VarCharVector |
+          _: DecimalVector | _: DateDayVector | _: TimeStampMicroTZVector | _: VarBinaryVector |
+          _: FixedSizeBinaryVector | _: TimeStampMicroVector) =>
+        v.asInstanceOf[FieldVector]
+      case _ => throw new SparkException(s"Unsupported Arrow Vector: ${valueVector.getClass}")
+    }
+  }
+}
diff --git a/common/src/main/scala/org/apache/comet/vector/StreamReader.scala b/common/src/main/scala/org/apache/comet/vector/StreamReader.scala
new file mode 100644
index 000000000..9c4f99602
--- /dev/null
+++ b/common/src/main/scala/org/apache/comet/vector/StreamReader.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.vector
+
+import java.nio.channels.ReadableByteChannel
+
+import scala.collection.JavaConverters.collectionAsScalaIterableConverter
+
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.{ArrowStreamReader, ReadChannel}
+import org.apache.arrow.vector.ipc.message.MessageChannelReader
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
+
+/**
+ * A reader that consumes Arrow data from an input channel, and produces Comet batches.
+ */
+case class StreamReader(channel: ReadableByteChannel) extends AutoCloseable {
+  private var allocator = new RootAllocator(Long.MaxValue)
+  private val channelReader = new MessageChannelReader(new ReadChannel(channel), allocator)
+  private var arrowReader = new ArrowStreamReader(channelReader, allocator)
+  private var root = arrowReader.getVectorSchemaRoot
+
+  def nextBatch(): Option[ColumnarBatch] = {
+    if (arrowReader.loadNextBatch()) {
+      Some(rootAsBatch(root))
+    } else {
+      None
+    }
+  }
+
+  private def rootAsBatch(root: VectorSchemaRoot): ColumnarBatch = {
+    val columns = root.getFieldVectors.asScala.map { vector =>
+      // Native shuffle always uses decimal128.
+      CometVector.getVector(vector, true, arrowReader).asInstanceOf[ColumnVector]
+    }.toArray
+    val batch = new ColumnarBatch(columns)
+    batch.setNumRows(root.getRowCount)
+    batch
+  }
+
+  override def close(): Unit = {
+    if (root != null) {
+      arrowReader.close()
+      root.close()
+      allocator.close()
+
+      arrowReader = null
+      root = null
+      allocator = null
+    }
+  }
+}
diff --git a/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometParquetReadSupport.scala b/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometParquetReadSupport.scala
new file mode 100644
index 000000000..0e8a190c2
--- /dev/null
+++ b/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometParquetReadSupport.scala
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet.parquet
+
+import java.util.{Locale, UUID}
+
+import scala.collection.JavaConverters._
+
+import org.apache.parquet.schema._
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation
+import org.apache.parquet.schema.Type.Repetition
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.types._
+
+import org.apache.comet.parquet.CometParquetUtils
+
+/**
+ * This class is copied & slightly modified from [[ParquetReadSupport]] in Spark. Changes:
+ *   - This doesn't extend from Parquet's `ReadSupport` class since that is used for row-based
+ *     Parquet reader. Therefore, there is no `init`, `prepareForRead` as well as other methods
+ *     that are unused.
+ */
+object CometParquetReadSupport {
+  val SPARK_PARQUET_SCHEMA_NAME = "spark_schema"
+
+  val EMPTY_MESSAGE: MessageType =
+    Types.buildMessage().named(SPARK_PARQUET_SCHEMA_NAME)
+
+  def generateFakeColumnName: String = s"_fake_name_${UUID.randomUUID()}"
+
+  def clipParquetSchema(
+      parquetSchema: MessageType,
+      catalystSchema: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean,
+      ignoreMissingIds: Boolean): MessageType = {
+    if (!ignoreMissingIds &&
+      !containsFieldIds(parquetSchema) &&
+      CometParquetUtils.hasFieldIds(catalystSchema)) {
+      throw new RuntimeException(
+        "Spark read schema expects field Ids, " +
+          "but Parquet file schema doesn't contain any field Ids.\n" +
+          "Please remove the field ids from Spark schema or ignore missing ids by " +
+          "setting `spark.sql.parquet.fieldId.read.ignoreMissing = true`\n" +
+          s"""
+             |Spark read schema:
+             |${catalystSchema.prettyJson}
+             |
+             |Parquet file schema:
+             |${parquetSchema.toString}
+             |""".stripMargin)
+    }
+    clipParquetSchema(parquetSchema, catalystSchema, caseSensitive, useFieldId)
+  }
+
+  /**
+   * Tailors `parquetSchema` according to `catalystSchema` by removing column paths don't exist in
+   * `catalystSchema`, and adding those only exist in `catalystSchema`.
+   */
+  def clipParquetSchema(
+      parquetSchema: MessageType,
+      catalystSchema: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): MessageType = {
+    val clippedParquetFields = clipParquetGroupFields(
+      parquetSchema.asGroupType(),
+      catalystSchema,
+      caseSensitive,
+      useFieldId)
+    if (clippedParquetFields.isEmpty) {
+      EMPTY_MESSAGE
+    } else {
+      Types
+        .buildMessage()
+        .addFields(clippedParquetFields: _*)
+        .named(SPARK_PARQUET_SCHEMA_NAME)
+    }
+  }
+
+  private def clipParquetType(
+      parquetType: Type,
+      catalystType: DataType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Type = {
+    val newParquetType = catalystType match {
+      case t: ArrayType if !isPrimitiveCatalystType(t.elementType) =>
+        // Only clips array types with nested type as element type.
+        clipParquetListType(parquetType.asGroupType(), t.elementType, caseSensitive, useFieldId)
+
+      case t: MapType
+          if !isPrimitiveCatalystType(t.keyType) ||
+            !isPrimitiveCatalystType(t.valueType) =>
+        // Only clips map types with nested key type or value type
+        clipParquetMapType(
+          parquetType.asGroupType(),
+          t.keyType,
+          t.valueType,
+          caseSensitive,
+          useFieldId)
+
+      case t: StructType =>
+        clipParquetGroup(parquetType.asGroupType(), t, caseSensitive, useFieldId)
+
+      case _ =>
+        // UDTs and primitive types are not clipped.  For UDTs, a clipped version might not be able
+        // to be mapped to desired user-space types.  So UDTs shouldn't participate schema merging.
+        parquetType
+    }
+
+    if (useFieldId && parquetType.getId != null) {
+      newParquetType.withId(parquetType.getId.intValue())
+    } else {
+      newParquetType
+    }
+  }
+
+  /**
+   * Whether a Catalyst [[DataType]] is primitive. Primitive [[DataType]] is not equivalent to
+   * [[AtomicType]]. For example, [[CalendarIntervalType]] is primitive, but it's not an
+   * [[AtomicType]].
+   */
+  private def isPrimitiveCatalystType(dataType: DataType): Boolean = {
+    dataType match {
+      case _: ArrayType | _: MapType | _: StructType => false
+      case _ => true
+    }
+  }
+
+  /**
+   * Clips a Parquet [[GroupType]] which corresponds to a Catalyst [[ArrayType]]. The element type
+   * of the [[ArrayType]] should also be a nested type, namely an [[ArrayType]], a [[MapType]], or
+   * a [[StructType]].
+   */
+  private def clipParquetListType(
+      parquetList: GroupType,
+      elementType: DataType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Type = {
+    // Precondition of this method, should only be called for lists with nested element types.
+    assert(!isPrimitiveCatalystType(elementType))
+
+    // Unannotated repeated group should be interpreted as required list of required element, so
+    // list element type is just the group itself.  Clip it.
+    if (parquetList.getLogicalTypeAnnotation == null &&
+      parquetList.isRepetition(Repetition.REPEATED)) {
+      clipParquetType(parquetList, elementType, caseSensitive, useFieldId)
+    } else {
+      assert(
+        parquetList.getLogicalTypeAnnotation.isInstanceOf[ListLogicalTypeAnnotation],
+        "Invalid Parquet schema. " +
+          "Logical type annotation of annotated Parquet lists must be ListLogicalTypeAnnotation: " +
+          parquetList.toString)
+
+      assert(
+        parquetList.getFieldCount == 1 && parquetList
+          .getType(0)
+          .isRepetition(Repetition.REPEATED),
+        "Invalid Parquet schema. " +
+          "LIST-annotated group should only have exactly one repeated field: " +
+          parquetList)
+
+      // Precondition of this method, should only be called for lists with nested element types.
+      assert(!parquetList.getType(0).isPrimitive)
+
+      val repeatedGroup = parquetList.getType(0).asGroupType()
+
+      // If the repeated field is a group with multiple fields, or the repeated field is a group
+      // with one field and is named either "array" or uses the LIST-annotated group's name with
+      // "_tuple" appended then the repeated type is the element type and elements are required.
+      // Build a new LIST-annotated group with clipped `repeatedGroup` as element type and the
+      // only field.
+      if (repeatedGroup.getFieldCount > 1 ||
+        repeatedGroup.getName == "array" ||
+        repeatedGroup.getName == parquetList.getName + "_tuple") {
+        Types
+          .buildGroup(parquetList.getRepetition)
+          .as(LogicalTypeAnnotation.listType())
+          .addField(clipParquetType(repeatedGroup, elementType, caseSensitive, useFieldId))
+          .named(parquetList.getName)
+      } else {
+        // Otherwise, the repeated field's type is the element type with the repeated field's
+        // repetition.
+        val newRepeatedGroup = Types
+          .repeatedGroup()
+          .addField(
+            clipParquetType(repeatedGroup.getType(0), elementType, caseSensitive, useFieldId))
+          .named(repeatedGroup.getName)
+
+        val newElementType = if (useFieldId && repeatedGroup.getId != null) {
+          newRepeatedGroup.withId(repeatedGroup.getId.intValue())
+        } else {
+          newRepeatedGroup
+        }
+
+        Types
+          .buildGroup(parquetList.getRepetition)
+          .as(LogicalTypeAnnotation.listType())
+          .addField(
+            Types
+              .repeatedGroup()
+              .addField(
+                clipParquetType(repeatedGroup.getType(0), elementType, caseSensitive, useFieldId))
+              .named(repeatedGroup.getName))
+          .addField(newElementType)
+          .named(parquetList.getName)
+      }
+    }
+  }
+
+  /**
+   * Clips a Parquet [[GroupType]] which corresponds to a Catalyst [[MapType]]. Either key type or
+   * value type of the [[MapType]] must be a nested type, namely an [[ArrayType]], a [[MapType]],
+   * or a [[StructType]].
+   */
+  private def clipParquetMapType(
+      parquetMap: GroupType,
+      keyType: DataType,
+      valueType: DataType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): GroupType = {
+    // Precondition of this method, only handles maps with nested key types or value types.
+    assert(!isPrimitiveCatalystType(keyType) || !isPrimitiveCatalystType(valueType))
+
+    val repeatedGroup = parquetMap.getType(0).asGroupType()
+    val parquetKeyType = repeatedGroup.getType(0)
+    val parquetValueType = repeatedGroup.getType(1)
+
+    val clippedRepeatedGroup = {
+      val newRepeatedGroup = Types
+        .repeatedGroup()
+        .as(repeatedGroup.getLogicalTypeAnnotation)
+        .addField(clipParquetType(parquetKeyType, keyType, caseSensitive, useFieldId))
+        .addField(clipParquetType(parquetValueType, valueType, caseSensitive, useFieldId))
+        .named(repeatedGroup.getName)
+      if (useFieldId && repeatedGroup.getId != null) {
+        newRepeatedGroup.withId(repeatedGroup.getId.intValue())
+      } else {
+        newRepeatedGroup
+      }
+    }
+
+    Types
+      .buildGroup(parquetMap.getRepetition)
+      .as(parquetMap.getLogicalTypeAnnotation)
+      .addField(clippedRepeatedGroup)
+      .named(parquetMap.getName)
+  }
+
+  /**
+   * Clips a Parquet [[GroupType]] which corresponds to a Catalyst [[StructType]].
+   *
+   * @return
+   *   A clipped [[GroupType]], which has at least one field.
+   * @note
+   *   Parquet doesn't allow creating empty [[GroupType]] instances except for empty
+   *   [[MessageType]]. Because it's legal to construct an empty requested schema for column
+   *   pruning.
+   */
+  private def clipParquetGroup(
+      parquetRecord: GroupType,
+      structType: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): GroupType = {
+    val clippedParquetFields =
+      clipParquetGroupFields(parquetRecord, structType, caseSensitive, useFieldId)
+    Types
+      .buildGroup(parquetRecord.getRepetition)
+      .as(parquetRecord.getLogicalTypeAnnotation)
+      .addFields(clippedParquetFields: _*)
+      .named(parquetRecord.getName)
+  }
+
+  /**
+   * Clips a Parquet [[GroupType]] which corresponds to a Catalyst [[StructType]].
+   *
+   * @return
+   *   A list of clipped [[GroupType]] fields, which can be empty.
+   */
+  private def clipParquetGroupFields(
+      parquetRecord: GroupType,
+      structType: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Seq[Type] = {
+    val toParquet = new CometSparkToParquetSchemaConverter(
+      writeLegacyParquetFormat = false,
+      useFieldId = useFieldId)
+    lazy val caseSensitiveParquetFieldMap =
+      parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
+    lazy val caseInsensitiveParquetFieldMap =
+      parquetRecord.getFields.asScala.groupBy(_.getName.toLowerCase(Locale.ROOT))
+    lazy val idToParquetFieldMap =
+      parquetRecord.getFields.asScala.filter(_.getId != null).groupBy(f => f.getId.intValue())
+
+    def matchCaseSensitiveField(f: StructField): Type = {
+      caseSensitiveParquetFieldMap
+        .get(f.name)
+        .map(clipParquetType(_, f.dataType, caseSensitive, useFieldId))
+        .getOrElse(toParquet.convertField(f))
+    }
+
+    def matchCaseInsensitiveField(f: StructField): Type = {
+      // Do case-insensitive resolution only if in case-insensitive mode
+      caseInsensitiveParquetFieldMap
+        .get(f.name.toLowerCase(Locale.ROOT))
+        .map { parquetTypes =>
+          if (parquetTypes.size > 1) {
+            // Need to fail if there is ambiguity, i.e. more than one field is matched
+            val parquetTypesString = parquetTypes.map(_.getName).mkString("[", ", ", "]")
+            throw QueryExecutionErrors.foundDuplicateFieldInCaseInsensitiveModeError(
+              f.name,
+              parquetTypesString)
+          } else {
+            clipParquetType(parquetTypes.head, f.dataType, caseSensitive, useFieldId)
+          }
+        }
+        .getOrElse(toParquet.convertField(f))
+    }
+
+    def matchIdField(f: StructField): Type = {
+      val fieldId = CometParquetUtils.getFieldId(f)
+      idToParquetFieldMap
+        .get(fieldId)
+        .map { parquetTypes =>
+          if (parquetTypes.size > 1) {
+            // Need to fail if there is ambiguity, i.e. more than one field is matched
+            val parquetTypesString = parquetTypes.map(_.getName).mkString("[", ", ", "]")
+            throw CometParquetUtils.foundDuplicateFieldInFieldIdLookupModeError(
+              fieldId,
+              parquetTypesString)
+          } else {
+            clipParquetType(parquetTypes.head, f.dataType, caseSensitive, useFieldId)
+          }
+        }
+        .getOrElse {
+          // When there is no ID match, we use a fake name to avoid a name match by accident
+          // We need this name to be unique as well, otherwise there will be type conflicts
+          toParquet.convertField(f.copy(name = generateFakeColumnName))
+        }
+    }
+
+    val shouldMatchById = useFieldId && CometParquetUtils.hasFieldIds(structType)
+    structType.map { f =>
+      if (shouldMatchById && CometParquetUtils.hasFieldId(f)) {
+        matchIdField(f)
+      } else if (caseSensitive) {
+        matchCaseSensitiveField(f)
+      } else {
+        matchCaseInsensitiveField(f)
+      }
+    }
+  }
+
+  /**
+   * Whether the parquet schema contains any field IDs.
+   */
+  private def containsFieldIds(schema: Type): Boolean = schema match {
+    case p: PrimitiveType => p.getId != null
+    // We don't require all fields to have IDs, so we use `exists` here.
+    case g: GroupType => g.getId != null || g.getFields.asScala.exists(containsFieldIds)
+  }
+}
diff --git a/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometSparkToParquetSchemaConverter.scala b/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometSparkToParquetSchemaConverter.scala
new file mode 100644
index 000000000..2c8187e18
--- /dev/null
+++ b/common/src/main/scala/org/apache/spark/sql/comet/parquet/CometSparkToParquetSchemaConverter.scala
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.parquet.schema._
+import org.apache.parquet.schema.LogicalTypeAnnotation._
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
+import org.apache.parquet.schema.Type.Repetition._
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+import org.apache.comet.parquet.CometParquetUtils
+
+/**
+ * This class is copied & modified from Spark's [[SparkToParquetSchemaConverter]] class.
+ */
+class CometSparkToParquetSchemaConverter(
+    writeLegacyParquetFormat: Boolean = SQLConf.PARQUET_WRITE_LEGACY_FORMAT.defaultValue.get,
+    outputTimestampType: SQLConf.ParquetOutputTimestampType.Value =
+      SQLConf.ParquetOutputTimestampType.INT96,
+    useFieldId: Boolean = CometParquetUtils.writeFieldId(new SQLConf)) {
+
+  def this(conf: SQLConf) = this(
+    writeLegacyParquetFormat = conf.writeLegacyParquetFormat,
+    outputTimestampType = conf.parquetOutputTimestampType,
+    useFieldId = CometParquetUtils.writeFieldId(conf))
+
+  def this(conf: Configuration) = this(
+    writeLegacyParquetFormat = conf.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key).toBoolean,
+    outputTimestampType = SQLConf.ParquetOutputTimestampType.withName(
+      conf.get(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key)),
+    useFieldId = CometParquetUtils.writeFieldId(conf))
+
+  /**
+   * Converts a Spark SQL [[StructType]] to a Parquet [[MessageType]].
+   */
+  def convert(catalystSchema: StructType): MessageType = {
+    Types
+      .buildMessage()
+      .addFields(catalystSchema.map(convertField): _*)
+      .named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME)
+  }
+
+  /**
+   * Converts a Spark SQL [[StructField]] to a Parquet [[Type]].
+   */
+  def convertField(field: StructField): Type = {
+    val converted = convertField(field, if (field.nullable) OPTIONAL else REQUIRED)
+    if (useFieldId && CometParquetUtils.hasFieldId(field)) {
+      converted.withId(CometParquetUtils.getFieldId(field))
+    } else {
+      converted
+    }
+  }
+
+  private def convertField(field: StructField, repetition: Type.Repetition): Type = {
+
+    field.dataType match {
+      // ===================
+      // Simple atomic types
+      // ===================
+
+      case BooleanType =>
+        Types.primitive(BOOLEAN, repetition).named(field.name)
+
+      case ByteType =>
+        Types
+          .primitive(INT32, repetition)
+          .as(LogicalTypeAnnotation.intType(8, true))
+          .named(field.name)
+
+      case ShortType =>
+        Types
+          .primitive(INT32, repetition)
+          .as(LogicalTypeAnnotation.intType(16, true))
+          .named(field.name)
+
+      case IntegerType =>
+        Types.primitive(INT32, repetition).named(field.name)
+
+      case LongType =>
+        Types.primitive(INT64, repetition).named(field.name)
+
+      case FloatType =>
+        Types.primitive(FLOAT, repetition).named(field.name)
+
+      case DoubleType =>
+        Types.primitive(DOUBLE, repetition).named(field.name)
+
+      case StringType =>
+        Types
+          .primitive(BINARY, repetition)
+          .as(LogicalTypeAnnotation.stringType())
+          .named(field.name)
+
+      case DateType =>
+        Types
+          .primitive(INT32, repetition)
+          .as(LogicalTypeAnnotation.dateType())
+          .named(field.name)
+
+      // NOTE: Spark SQL can write timestamp values to Parquet using INT96, TIMESTAMP_MICROS or
+      // TIMESTAMP_MILLIS. TIMESTAMP_MICROS is recommended but INT96 is the default to keep the
+      // behavior same as before.
+      //
+      // As stated in PARQUET-323, Parquet `INT96` was originally introduced to represent nanosecond
+      // timestamp in Impala for some historical reasons.  It's not recommended to be used for any
+      // other types and will probably be deprecated in some future version of parquet-format spec.
+      // That's the reason why parquet-format spec only defines `TIMESTAMP_MILLIS` and
+      // `TIMESTAMP_MICROS` which are both logical types annotating `INT64`.
+      //
+      // Originally, Spark SQL uses the same nanosecond timestamp type as Impala and Hive.  Starting
+      // from Spark 1.5.0, we resort to a timestamp type with microsecond precision so that we can
+      // store a timestamp into a `Long`.  This design decision is subject to change though, for
+      // example, we may resort to nanosecond precision in the future.
+      case TimestampType =>
+        outputTimestampType match {
+          case SQLConf.ParquetOutputTimestampType.INT96 =>
+            Types.primitive(INT96, repetition).named(field.name)
+          case SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS =>
+            Types
+              .primitive(INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MICROS))
+              .named(field.name)
+          case SQLConf.ParquetOutputTimestampType.TIMESTAMP_MILLIS =>
+            Types
+              .primitive(INT64, repetition)
+              .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS))
+              .named(field.name)
+        }
+
+      case TimestampNTZType =>
+        Types
+          .primitive(INT64, repetition)
+          .as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MICROS))
+          .named(field.name)
+
+      case BinaryType =>
+        Types.primitive(BINARY, repetition).named(field.name)
+
+      // ======================
+      // Decimals (legacy mode)
+      // ======================
+
+      // Spark 1.4.x and prior versions only support decimals with a maximum precision of 18 and
+      // always store decimals in fixed-length byte arrays.  To keep compatibility with these older
+      // versions, here we convert decimals with all precisions to `FIXED_LEN_BYTE_ARRAY` annotated
+      // by `DECIMAL`.
+      case DecimalType.Fixed(precision, scale) if writeLegacyParquetFormat =>
+        Types
+          .primitive(FIXED_LEN_BYTE_ARRAY, repetition)
+          .as(LogicalTypeAnnotation.decimalType(scale, precision))
+          .length(Decimal.minBytesForPrecision(precision))
+          .named(field.name)
+
+      // ========================
+      // Decimals (standard mode)
+      // ========================
+
+      // Uses INT32 for 1 <= precision <= 9
+      case DecimalType.Fixed(precision, scale)
+          if precision <= Decimal.MAX_INT_DIGITS && !writeLegacyParquetFormat =>
+        Types
+          .primitive(INT32, repetition)
+          .as(LogicalTypeAnnotation.decimalType(scale, precision))
+          .named(field.name)
+
+      // Uses INT64 for 1 <= precision <= 18
+      case DecimalType.Fixed(precision, scale)
+          if precision <= Decimal.MAX_LONG_DIGITS && !writeLegacyParquetFormat =>
+        Types
+          .primitive(INT64, repetition)
+          .as(LogicalTypeAnnotation.decimalType(scale, precision))
+          .named(field.name)
+
+      // Uses FIXED_LEN_BYTE_ARRAY for all other precisions
+      case DecimalType.Fixed(precision, scale) if !writeLegacyParquetFormat =>
+        Types
+          .primitive(FIXED_LEN_BYTE_ARRAY, repetition)
+          .as(LogicalTypeAnnotation.decimalType(scale, precision))
+          .length(Decimal.minBytesForPrecision(precision))
+          .named(field.name)
+
+      // ===================================
+      // ArrayType and MapType (legacy mode)
+      // ===================================
+
+      // Spark 1.4.x and prior versions convert `ArrayType` with nullable elements into a 3-level
+      // `LIST` structure.  This behavior is somewhat a hybrid of parquet-hive and parquet-avro
+      // (1.6.0rc3): the 3-level structure is similar to parquet-hive while the 3rd level element
+      // field name "array" is borrowed from parquet-avro.
+      case ArrayType(elementType, nullable @ true) if writeLegacyParquetFormat =>
+        // <list-repetition> group <name> (LIST) {
+        //   optional group bag {
+        //     repeated <element-type> array;
+        //   }
+        // }
+
+        // This should not use `listOfElements` here because this new method checks if the
+        // element name is `element` in the `GroupType` and throws an exception if not.
+        // As mentioned above, Spark prior to 1.4.x writes `ArrayType` as `LIST` but with
+        // `array` as its element name as below. Therefore, we build manually
+        // the correct group type here via the builder. (See SPARK-16777)
+        Types
+          .buildGroup(repetition)
+          .as(LogicalTypeAnnotation.listType())
+          .addField(
+            Types
+              .buildGroup(REPEATED)
+              // "array" is the name chosen by parquet-hive (1.7.0 and prior version)
+              .addField(convertField(StructField("array", elementType, nullable)))
+              .named("bag"))
+          .named(field.name)
+
+      // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
+      // LIST structure.  This behavior mimics parquet-avro (1.6.0rc3).  Note that this case is
+      // covered by the backwards-compatibility rules implemented in `isElementType()`.
+      case ArrayType(elementType, nullable @ false) if writeLegacyParquetFormat =>
+        // <list-repetition> group <name> (LIST) {
+        //   repeated <element-type> element;
+        // }
+
+        // Here too, we should not use `listOfElements`. (See SPARK-16777)
+        Types
+          .buildGroup(repetition)
+          .as(LogicalTypeAnnotation.listType())
+          // "array" is the name chosen by parquet-avro (1.7.0 and prior version)
+          .addField(convertField(StructField("array", elementType, nullable), REPEATED))
+          .named(field.name)
+
+      // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by
+      // MAP_KEY_VALUE.  This is covered by `convertGroupField(field: GroupType): DataType`.
+      case MapType(keyType, valueType, valueContainsNull) if writeLegacyParquetFormat =>
+        // <map-repetition> group <name> (MAP) {
+        //   repeated group map (MAP_KEY_VALUE) {
+        //     required <key-type> key;
+        //     <value-repetition> <value-type> value;
+        //   }
+        // }
+        ConversionPatterns.mapType(
+          repetition,
+          field.name,
+          convertField(StructField("key", keyType, nullable = false)),
+          convertField(StructField("value", valueType, valueContainsNull)))
+
+      // =====================================
+      // ArrayType and MapType (standard mode)
+      // =====================================
+
+      case ArrayType(elementType, containsNull) if !writeLegacyParquetFormat =>
+        // <list-repetition> group <name> (LIST) {
+        //   repeated group list {
+        //     <element-repetition> <element-type> element;
+        //   }
+        // }
+        Types
+          .buildGroup(repetition)
+          .as(LogicalTypeAnnotation.listType())
+          .addField(
+            Types
+              .repeatedGroup()
+              .addField(convertField(StructField("element", elementType, containsNull)))
+              .named("list"))
+          .named(field.name)
+
+      case MapType(keyType, valueType, valueContainsNull) =>
+        // <map-repetition> group <name> (MAP) {
+        //   repeated group key_value {
+        //     required <key-type> key;
+        //     <value-repetition> <value-type> value;
+        //   }
+        // }
+        Types
+          .buildGroup(repetition)
+          .as(LogicalTypeAnnotation.mapType())
+          .addField(
+            Types
+              .repeatedGroup()
+              .addField(convertField(StructField("key", keyType, nullable = false)))
+              .addField(convertField(StructField("value", valueType, valueContainsNull)))
+              .named("key_value"))
+          .named(field.name)
+
+      // ===========
+      // Other types
+      // ===========
+
+      case StructType(fields) =>
+        fields
+          .foldLeft(Types.buildGroup(repetition)) { (builder, field) =>
+            builder.addField(convertField(field))
+          }
+          .named(field.name)
+
+      case udt: UserDefinedType[_] =>
+        convertField(field.copy(dataType = udt.sqlType))
+
+      case _ =>
+        throw QueryCompilationErrors.cannotConvertDataTypeToParquetTypeError(field)
+    }
+  }
+}
diff --git a/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala b/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
new file mode 100644
index 000000000..9e0541f44
--- /dev/null
+++ b/common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet.util
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.complex.MapVector
+import org.apache.arrow.vector.types._
+import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
+import org.apache.spark.sql.types._
+
+object Utils {
+  def getConfPath(confFileName: String): String = {
+    sys.env
+      .get("COMET_CONF_DIR")
+      .map { t => new File(s"$t${File.separator}$confFileName") }
+      .filter(_.isFile)
+      .map(_.getAbsolutePath)
+      .orNull
+  }
+
+  def stringToSeq(str: String): Seq[String] = {
+    str.split(",").map(_.trim()).filter(_.nonEmpty)
+  }
+
+  def fromArrowField(field: Field): DataType = {
+    field.getType match {
+      case _: ArrowType.Map =>
+        val elementField = field.getChildren.get(0)
+        val keyType = fromArrowField(elementField.getChildren.get(0))
+        val valueType = fromArrowField(elementField.getChildren.get(1))
+        MapType(keyType, valueType, elementField.getChildren.get(1).isNullable)
+      case ArrowType.List.INSTANCE =>
+        val elementField = field.getChildren().get(0)
+        val elementType = fromArrowField(elementField)
+        ArrayType(elementType, containsNull = elementField.isNullable)
+      case ArrowType.Struct.INSTANCE =>
+        val fields = field.getChildren().asScala.map { child =>
+          val dt = fromArrowField(child)
+          StructField(child.getName, dt, child.isNullable)
+        }
+        StructType(fields.toSeq)
+      case arrowType => fromArrowType(arrowType)
+    }
+  }
+
+  def fromArrowType(dt: ArrowType): DataType = dt match {
+    case ArrowType.Bool.INSTANCE => BooleanType
+    case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 => ByteType
+    case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 2 => ShortType
+    case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 4 => IntegerType
+    case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 8 => LongType
+    case float: ArrowType.FloatingPoint if float.getPrecision == FloatingPointPrecision.SINGLE =>
+      FloatType
+    case float: ArrowType.FloatingPoint if float.getPrecision == FloatingPointPrecision.DOUBLE =>
+      DoubleType
+    case ArrowType.Utf8.INSTANCE => StringType
+    case ArrowType.Binary.INSTANCE => BinaryType
+    case _: ArrowType.FixedSizeBinary => BinaryType
+    case d: ArrowType.Decimal => DecimalType(d.getPrecision, d.getScale)
+    case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType
+    case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND => TimestampType
+    case ArrowType.Null.INSTANCE => NullType
+    case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH =>
+      YearMonthIntervalType()
+    case di: ArrowType.Interval if di.getUnit == IntervalUnit.DAY_TIME => DayTimeIntervalType()
+    case _ => throw new UnsupportedOperationException(s"Unsupported data type: ${dt.toString}")
+  }
+
+  /** Maps data type from Spark to Arrow. NOTE: timeZoneId required for TimestampTypes */
+  def toArrowType(dt: DataType, timeZoneId: String): ArrowType =
+    dt match {
+      case BooleanType => ArrowType.Bool.INSTANCE
+      case ByteType => new ArrowType.Int(8, true)
+      case ShortType => new ArrowType.Int(8 * 2, true)
+      case IntegerType => new ArrowType.Int(8 * 4, true)
+      case LongType => new ArrowType.Int(8 * 8, true)
+      case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+      case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+      case StringType => ArrowType.Utf8.INSTANCE
+      case BinaryType => ArrowType.Binary.INSTANCE
+      case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale, 128)
+      case DateType => new ArrowType.Date(DateUnit.DAY)
+      case TimestampType =>
+        if (timeZoneId == null) {
+          throw new UnsupportedOperationException(
+            s"${TimestampType.catalogString} must supply timeZoneId parameter")
+        } else {
+          new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId)
+        }
+      case TimestampNTZType =>
+        new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
+      case _ =>
+        throw new UnsupportedOperationException(s"Unsupported data type: ${dt.catalogString}")
+    }
+
+  /** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */
+  def toArrowField(name: String, dt: DataType, nullable: Boolean, timeZoneId: String): Field = {
+    dt match {
+      case ArrayType(elementType, containsNull) =>
+        val fieldType = new FieldType(nullable, ArrowType.List.INSTANCE, null)
+        new Field(
+          name,
+          fieldType,
+          Seq(toArrowField("element", elementType, containsNull, timeZoneId)).asJava)
+      case StructType(fields) =>
+        val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null)
+        new Field(
+          name,
+          fieldType,
+          fields
+            .map { field =>
+              toArrowField(field.name, field.dataType, field.nullable, timeZoneId)
+            }
+            .toSeq
+            .asJava)
+      case MapType(keyType, valueType, valueContainsNull) =>
+        val mapType = new FieldType(nullable, new ArrowType.Map(false), null)
+        // Note: Map Type struct can not be null, Struct Type key field can not be null
+        new Field(
+          name,
+          mapType,
+          Seq(
+            toArrowField(
+              MapVector.DATA_VECTOR_NAME,
+              new StructType()
+                .add(MapVector.KEY_NAME, keyType, nullable = false)
+                .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
+              nullable = false,
+              timeZoneId)).asJava)
+      case dataType =>
+        val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null)
+        new Field(name, fieldType, Seq.empty[Field].asJava)
+    }
+  }
+
+  /**
+   * Maps schema from Spark to Arrow. NOTE: timeZoneId required for TimestampType in StructType
+   */
+  def toArrowSchema(schema: StructType, timeZoneId: String): Schema = {
+    new Schema(schema.map { field =>
+      toArrowField(field.name, field.dataType, field.nullable, timeZoneId)
+    }.asJava)
+  }
+}
diff --git a/common/src/test/java/org/apache/comet/parquet/TestColumnReader.java b/common/src/test/java/org/apache/comet/parquet/TestColumnReader.java
new file mode 100644
index 000000000..d4e748a9b
--- /dev/null
+++ b/common/src/test/java/org/apache/comet/parquet/TestColumnReader.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.BiFunction;
+
+import scala.collection.JavaConverters;
+
+import org.junit.Test;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnVector;
+
+import org.apache.comet.vector.CometPlainVector;
+import org.apache.comet.vector.CometVector;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+import static org.junit.Assert.*;
+
+@SuppressWarnings("unchecked")
+public class TestColumnReader {
+  private static final int BATCH_SIZE = 1024;
+  private static final List<DataType> TYPES =
+      Arrays.asList(
+          BooleanType,
+          ByteType,
+          ShortType,
+          IntegerType,
+          LongType,
+          FloatType,
+          DoubleType,
+          BinaryType,
+          DecimalType.apply(5, 2),
+          DecimalType.apply(18, 10),
+          DecimalType.apply(19, 5));
+  private static final List<Object> VALUES =
+      Arrays.asList(
+          true,
+          (byte) 42,
+          (short) 100,
+          1000,
+          (long) 10000,
+          (float) 3.14,
+          3.1415926,
+          new byte[] {1, 2, 3, 4, 5, 6, 7, 8},
+          Decimal.apply("123.45"),
+          Decimal.apply("00.0123456789"),
+          Decimal.apply("-001234.56789"));
+  private static final List<BiFunction<CometVector, Integer, Object>> GETTERS =
+      Arrays.asList(
+          ColumnVector::getBoolean,
+          ColumnVector::getByte,
+          ColumnVector::getShort,
+          ColumnVector::getInt,
+          ColumnVector::getLong,
+          ColumnVector::getFloat,
+          ColumnVector::getDouble,
+          ColumnVector::getBinary,
+          (v, i) -> v.getDecimal(i, 5, 2),
+          (v, i) -> v.getDecimal(i, 18, 10),
+          (v, i) -> v.getDecimal(i, 19, 5));
+
+  @Test
+  public void testConstantVectors() {
+    for (int i = 0; i < TYPES.size(); i++) {
+      DataType type = TYPES.get(i);
+      StructField field = StructField.apply("f", type, false, null);
+
+      List<Object> values = Collections.singletonList(VALUES.get(i));
+      InternalRow row = GenericInternalRow.apply(JavaConverters.asScalaBuffer(values).toSeq());
+      ConstantColumnReader reader = new ConstantColumnReader(field, BATCH_SIZE, row, 0, true);
+      reader.readBatch(BATCH_SIZE);
+      CometVector vector = reader.currentBatch();
+      assertEquals(BATCH_SIZE, vector.numValues());
+      assertEquals(0, vector.numNulls());
+      for (int j = 0; j < BATCH_SIZE; j++) {
+        if (TYPES.get(i) == BinaryType || TYPES.get(i) == StringType) {
+          assertArrayEquals((byte[]) VALUES.get(i), (byte[]) GETTERS.get(i).apply(vector, j));
+        } else {
+          assertEquals(VALUES.get(i), GETTERS.get(i).apply(vector, j));
+        }
+      }
+
+      // Test null values too
+      row.setNullAt(0);
+      reader = new ConstantColumnReader(field, BATCH_SIZE, row, 0, true);
+      reader.readBatch(BATCH_SIZE);
+      vector = reader.currentBatch();
+      assertEquals(BATCH_SIZE, vector.numValues());
+      assertEquals(BATCH_SIZE, vector.numNulls());
+      for (int j = 0; j < BATCH_SIZE; j++) {
+        assertTrue(vector.isNullAt(j));
+      }
+    }
+
+    if (org.apache.spark.package$.MODULE$.SPARK_VERSION_SHORT().compareTo("3.4") >= 0) {
+      Metadata meta = new MetadataBuilder().putString("EXISTS_DEFAULT", "123").build();
+      StructField field = StructField.apply("f", LongType, false, meta);
+      ConstantColumnReader reader = new ConstantColumnReader(field, BATCH_SIZE, true);
+      reader.readBatch(BATCH_SIZE);
+      CometVector vector = reader.currentBatch();
+
+      assertEquals(BATCH_SIZE, vector.numValues());
+      assertEquals(0, vector.numNulls());
+      for (int j = 0; j < BATCH_SIZE; j++) {
+        assertEquals(123, vector.getLong(j));
+      }
+    }
+  }
+
+  @Test
+  public void testRowIndexColumnVectors() {
+    StructField field = StructField.apply("f", LongType, false, null);
+    int bigBatchSize = BATCH_SIZE * 2;
+    int step = 4;
+    int batchSize = bigBatchSize / step;
+    long[] indices = new long[step * 2];
+    List<Long> expected = new ArrayList<>();
+
+    long idx = 0, len = 0;
+    for (int i = 0; i < step; i++) {
+      idx = ThreadLocalRandom.current().nextLong(idx + len, Long.MAX_VALUE);
+      indices[i * 2] = idx;
+      len = ThreadLocalRandom.current().nextLong(Long.max(bigBatchSize - expected.size(), 0));
+      indices[i * 2 + 1] = len;
+      for (int j = 0; j < len; j++) {
+        expected.add(idx + j);
+      }
+    }
+
+    RowIndexColumnReader reader = new RowIndexColumnReader(field, BATCH_SIZE, indices);
+    for (int i = 0; i < step; i++) {
+      reader.readBatch(batchSize);
+      CometVector vector = reader.currentBatch();
+      assertEquals(
+          Integer.min(batchSize, Integer.max(expected.size() - i * batchSize, 0)),
+          vector.numValues());
+      assertEquals(0, vector.numNulls());
+      for (int j = 0; j < vector.numValues(); j++) {
+        assertEquals((long) expected.get(i * batchSize + j), vector.getLong(j));
+      }
+    }
+
+    reader.close();
+  }
+
+  @Test
+  public void testIsFixedLength() {
+    BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+
+    ValueVector vv = new IntVector("v1", allocator);
+    CometVector vector = new CometPlainVector(vv, false);
+    assertTrue(vector.isFixedLength());
+
+    vv = new FixedSizeBinaryVector("v2", allocator, 12);
+    vector = new CometPlainVector(vv, false);
+    assertTrue(vector.isFixedLength());
+
+    vv = new VarBinaryVector("v3", allocator);
+    vector = new CometPlainVector(vv, false);
+    assertFalse(vector.isFixedLength());
+  }
+}
diff --git a/common/src/test/java/org/apache/comet/parquet/TestCometInputFile.java b/common/src/test/java/org/apache/comet/parquet/TestCometInputFile.java
new file mode 100644
index 000000000..63bb65d5c
--- /dev/null
+++ b/common/src/test/java/org/apache/comet/parquet/TestCometInputFile.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestCometInputFile {
+  @Test
+  public void testIsAtLeastHadoop33() {
+    Assert.assertTrue(CometInputFile.isAtLeastHadoop33("3.3.0"));
+    Assert.assertTrue(CometInputFile.isAtLeastHadoop33("3.4.0-SNAPSHOT"));
+    Assert.assertTrue(CometInputFile.isAtLeastHadoop33("3.12.5"));
+    Assert.assertTrue(CometInputFile.isAtLeastHadoop33("3.20.6.4-xyz"));
+
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("2.7.2"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("2.7.3-SNAPSHOT"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("2.7"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("2"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3.2"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3.0.2.5-abc"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3.1.2-test"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3-SNAPSHOT"));
+    Assert.assertFalse(CometInputFile.isAtLeastHadoop33("3.2-SNAPSHOT"));
+  }
+}
diff --git a/common/src/test/java/org/apache/comet/parquet/TestFileReader.java b/common/src/test/java/org/apache/comet/parquet/TestFileReader.java
new file mode 100644
index 000000000..6e73f7510
--- /dev/null
+++ b/common/src/test/java/org/apache/comet/parquet/TestFileReader.java
@@ -0,0 +1,829 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.HadoopReadOptions;
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.statistics.BinaryStatistics;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.filter2.predicate.FilterApi;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.filter2.predicate.Operators;
+import org.apache.parquet.hadoop.ParquetFileWriter;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.apache.parquet.hadoop.metadata.*;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.internal.column.columnindex.BoundaryOrder;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+
+import static org.apache.parquet.column.Encoding.*;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.MAX_STATS_SIZE;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+
+@SuppressWarnings("deprecation")
+public class TestFileReader {
+  private static final MessageType SCHEMA =
+      MessageTypeParser.parseMessageType(
+          ""
+              + "message m {"
+              + "  required group a {"
+              + "    required binary b;"
+              + "  }"
+              + "  required group c {"
+              + "    required int64 d;"
+              + "  }"
+              + "}");
+
+  private static final MessageType SCHEMA2 =
+      MessageTypeParser.parseMessageType(
+          ""
+              + "message root { "
+              + "required int32 id;"
+              + "required binary name(UTF8); "
+              + "required int32 num; "
+              + "required binary comment(UTF8);"
+              + "}");
+
+  private static final MessageType PROJECTED_SCHEMA2 =
+      MessageTypeParser.parseMessageType(
+          ""
+              + "message root { "
+              + "required int32 id;"
+              + "required binary name(UTF8); "
+              + "required binary comment(UTF8);"
+              + "}");
+
+  private static final String[] PATH1 = {"a", "b"};
+  private static final ColumnDescriptor C1 = SCHEMA.getColumnDescription(PATH1);
+  private static final String[] PATH2 = {"c", "d"};
+  private static final ColumnDescriptor C2 = SCHEMA.getColumnDescription(PATH2);
+
+  private static final byte[] BYTES1 = {0, 1, 2, 3};
+  private static final byte[] BYTES2 = {1, 2, 3, 4};
+  private static final byte[] BYTES3 = {2, 3, 4, 5};
+  private static final byte[] BYTES4 = {3, 4, 5, 6};
+  private static final CompressionCodecName CODEC = CompressionCodecName.UNCOMPRESSED;
+
+  private static final org.apache.parquet.column.statistics.Statistics<?> EMPTY_STATS =
+      org.apache.parquet.column.statistics.Statistics.getBuilderForReading(
+              Types.required(PrimitiveTypeName.BINARY).named("test_binary"))
+          .build();
+
+  @Rule public final TemporaryFolder temp = new TemporaryFolder();
+
+  @Test
+  public void testEnableReadParallel() {
+    Configuration configuration = new Configuration();
+    ReadOptions options = ReadOptions.builder(configuration).build();
+
+    assertFalse(FileReader.shouldReadParallel(options, "hdfs"));
+    assertFalse(FileReader.shouldReadParallel(options, "file"));
+    assertFalse(FileReader.shouldReadParallel(options, null));
+    assertTrue(FileReader.shouldReadParallel(options, "s3a"));
+
+    options = ReadOptions.builder(configuration).enableParallelIO(false).build();
+    assertFalse(FileReader.shouldReadParallel(options, "s3a"));
+  }
+
+  @Test
+  public void testReadWrite() throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+
+    // Start a Parquet file with 2 row groups, each with 2 column chunks
+    ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
+    w.start();
+    w.startBlock(3);
+    w.startColumn(C1, 5, CODEC);
+    long c1Starts = w.getPos();
+    long c1p1Starts = w.getPos();
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.writeDataPage(3, 4, BytesInput.from(BYTES1), EMPTY_STATS, 3, RLE, RLE, PLAIN);
+    w.endColumn();
+    long c1Ends = w.getPos();
+    w.startColumn(C2, 6, CODEC);
+    long c2Starts = w.getPos();
+    w.writeDictionaryPage(new DictionaryPage(BytesInput.from(BYTES2), 4, RLE_DICTIONARY));
+    long c2p1Starts = w.getPos();
+    w.writeDataPage(2, 4, BytesInput.from(BYTES2), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.writeDataPage(3, 4, BytesInput.from(BYTES2), EMPTY_STATS, 3, RLE, RLE, PLAIN);
+    w.writeDataPage(1, 4, BytesInput.from(BYTES2), EMPTY_STATS, 1, RLE, RLE, PLAIN);
+    w.endColumn();
+    long c2Ends = w.getPos();
+    w.endBlock();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(7, 4, BytesInput.from(BYTES3), EMPTY_STATS, 7, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    w.writeDataPage(8, 4, BytesInput.from(BYTES4), EMPTY_STATS, 8, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.endBlock();
+    w.end(new HashMap<>());
+
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = ParquetReadOptions.builder().build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    try (FileReader reader = new FileReader(file, options, cometOptions)) {
+      ParquetMetadata readFooter = reader.getFooter();
+      assertEquals("footer: " + readFooter, 2, readFooter.getBlocks().size());
+      BlockMetaData rowGroup = readFooter.getBlocks().get(0);
+      assertEquals(c1Ends - c1Starts, rowGroup.getColumns().get(0).getTotalSize());
+      assertEquals(c2Ends - c2Starts, rowGroup.getColumns().get(1).getTotalSize());
+      assertEquals(c2Ends - c1Starts, rowGroup.getTotalByteSize());
+
+      assertEquals(c1Starts, rowGroup.getColumns().get(0).getStartingPos());
+      assertEquals(0, rowGroup.getColumns().get(0).getDictionaryPageOffset());
+      assertEquals(c1p1Starts, rowGroup.getColumns().get(0).getFirstDataPageOffset());
+      assertEquals(c2Starts, rowGroup.getColumns().get(1).getStartingPos());
+      assertEquals(c2Starts, rowGroup.getColumns().get(1).getDictionaryPageOffset());
+      assertEquals(c2p1Starts, rowGroup.getColumns().get(1).getFirstDataPageOffset());
+
+      HashSet<Encoding> expectedEncoding = new HashSet<>();
+      expectedEncoding.add(PLAIN);
+      expectedEncoding.add(RLE);
+      assertEquals(expectedEncoding, rowGroup.getColumns().get(0).getEncodings());
+    }
+
+    // read first block of col #1
+    try (FileReader r = new FileReader(file, options, cometOptions)) {
+      r.setRequestedSchema(Arrays.asList(SCHEMA.getColumnDescription(PATH1)));
+      PageReadStore pages = r.readNextRowGroup();
+      assertEquals(3, pages.getRowCount());
+      validateContains(pages, PATH1, 2, BytesInput.from(BYTES1));
+      validateContains(pages, PATH1, 3, BytesInput.from(BYTES1));
+      assertTrue(r.skipNextRowGroup());
+      assertNull(r.readNextRowGroup());
+    }
+
+    // read all blocks of col #1 and #2
+    try (FileReader r = new FileReader(file, options, cometOptions)) {
+      r.setRequestedSchema(
+          Arrays.asList(SCHEMA.getColumnDescription(PATH1), SCHEMA.getColumnDescription(PATH2)));
+      PageReadStore pages = r.readNextRowGroup();
+      assertEquals(3, pages.getRowCount());
+      validateContains(pages, PATH1, 2, BytesInput.from(BYTES1));
+      validateContains(pages, PATH1, 3, BytesInput.from(BYTES1));
+      validateContains(pages, PATH2, 2, BytesInput.from(BYTES2));
+      validateContains(pages, PATH2, 3, BytesInput.from(BYTES2));
+      validateContains(pages, PATH2, 1, BytesInput.from(BYTES2));
+
+      pages = r.readNextRowGroup();
+      assertEquals(4, pages.getRowCount());
+
+      validateContains(pages, PATH1, 7, BytesInput.from(BYTES3));
+      validateContains(pages, PATH2, 8, BytesInput.from(BYTES4));
+
+      assertNull(r.readNextRowGroup());
+    }
+  }
+
+  @Test
+  public void testBloomFilterReadWrite() throws Exception {
+    MessageType schema =
+        MessageTypeParser.parseMessageType("message test { required binary foo; }");
+    File testFile = temp.newFile();
+    testFile.delete();
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+    configuration.set("parquet.bloom.filter.column.names", "foo");
+    String[] colPath = {"foo"};
+
+    ColumnDescriptor col = schema.getColumnDescription(colPath);
+    BinaryStatistics stats1 = new BinaryStatistics();
+    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
+    w.start();
+    w.startBlock(3);
+    w.startColumn(col, 5, CODEC);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1), stats1, 2, RLE, RLE, PLAIN);
+    w.writeDataPage(3, 4, BytesInput.from(BYTES1), stats1, 2, RLE, RLE, PLAIN);
+    w.endColumn();
+    BloomFilter blockSplitBloomFilter = new BlockSplitBloomFilter(0);
+    blockSplitBloomFilter.insertHash(blockSplitBloomFilter.hash(Binary.fromString("hello")));
+    blockSplitBloomFilter.insertHash(blockSplitBloomFilter.hash(Binary.fromString("world")));
+    addBloomFilter(w, "foo", blockSplitBloomFilter);
+    w.endBlock();
+    w.end(new HashMap<>());
+
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = ParquetReadOptions.builder().build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    try (FileReader r = new FileReader(file, options, cometOptions)) {
+      ParquetMetadata footer = r.getFooter();
+      r.setRequestedSchema(Arrays.asList(schema.getColumnDescription(colPath)));
+      BloomFilterReader bloomFilterReader =
+          new BloomFilterReader(
+              footer.getBlocks().get(0),
+              r.getFileMetaData().getFileDecryptor(),
+              r.getInputStream());
+      BloomFilter bloomFilter =
+          bloomFilterReader.readBloomFilter(footer.getBlocks().get(0).getColumns().get(0));
+      assertTrue(bloomFilter.findHash(blockSplitBloomFilter.hash(Binary.fromString("hello"))));
+      assertTrue(bloomFilter.findHash(blockSplitBloomFilter.hash(Binary.fromString("world"))));
+    }
+  }
+
+  @Test
+  public void testReadWriteDataPageV2() throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+
+    ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
+    w.start();
+    w.startBlock(14);
+
+    BytesInput repLevels = BytesInput.fromInt(2);
+    BytesInput defLevels = BytesInput.fromInt(1);
+    BytesInput data = BytesInput.fromInt(3);
+    BytesInput data2 = BytesInput.fromInt(10);
+
+    org.apache.parquet.column.statistics.Statistics<?> statsC1P1 = createStatistics("s", "z", C1);
+    org.apache.parquet.column.statistics.Statistics<?> statsC1P2 = createStatistics("b", "d", C1);
+
+    w.startColumn(C1, 6, CODEC);
+    long c1Starts = w.getPos();
+    w.writeDataPageV2(4, 1, 3, repLevels, defLevels, PLAIN, data, 4, statsC1P1);
+    w.writeDataPageV2(3, 0, 3, repLevels, defLevels, PLAIN, data, 4, statsC1P2);
+    w.endColumn();
+    long c1Ends = w.getPos();
+
+    w.startColumn(C2, 5, CODEC);
+    long c2Starts = w.getPos();
+    w.writeDataPageV2(5, 2, 3, repLevels, defLevels, PLAIN, data2, 4, EMPTY_STATS);
+    w.writeDataPageV2(2, 0, 2, repLevels, defLevels, PLAIN, data2, 4, EMPTY_STATS);
+    w.endColumn();
+    long c2Ends = w.getPos();
+
+    w.endBlock();
+    w.end(new HashMap<>());
+
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = ParquetReadOptions.builder().build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    try (FileReader reader = new FileReader(file, options, cometOptions)) {
+      ParquetMetadata footer = reader.getFooter();
+      assertEquals("footer: " + footer, 1, footer.getBlocks().size());
+      assertEquals(c1Ends - c1Starts, footer.getBlocks().get(0).getColumns().get(0).getTotalSize());
+      assertEquals(c2Ends - c2Starts, footer.getBlocks().get(0).getColumns().get(1).getTotalSize());
+      assertEquals(c2Ends - c1Starts, footer.getBlocks().get(0).getTotalByteSize());
+
+      // check for stats
+      org.apache.parquet.column.statistics.Statistics<?> expectedStats =
+          createStatistics("b", "z", C1);
+      assertStatsValuesEqual(
+          expectedStats, footer.getBlocks().get(0).getColumns().get(0).getStatistics());
+
+      HashSet<Encoding> expectedEncoding = new HashSet<>();
+      expectedEncoding.add(PLAIN);
+      assertEquals(expectedEncoding, footer.getBlocks().get(0).getColumns().get(0).getEncodings());
+    }
+
+    try (FileReader r = new FileReader(file, options, cometOptions)) {
+      r.setRequestedSchema(
+          Arrays.asList(SCHEMA.getColumnDescription(PATH1), SCHEMA.getColumnDescription(PATH2)));
+      PageReadStore pages = r.readNextRowGroup();
+      assertEquals(14, pages.getRowCount());
+      validateV2Page(
+          pages,
+          PATH1,
+          3,
+          4,
+          1,
+          repLevels.toByteArray(),
+          defLevels.toByteArray(),
+          data.toByteArray(),
+          12);
+      validateV2Page(
+          pages,
+          PATH1,
+          3,
+          3,
+          0,
+          repLevels.toByteArray(),
+          defLevels.toByteArray(),
+          data.toByteArray(),
+          12);
+      validateV2Page(
+          pages,
+          PATH2,
+          3,
+          5,
+          2,
+          repLevels.toByteArray(),
+          defLevels.toByteArray(),
+          data2.toByteArray(),
+          12);
+      validateV2Page(
+          pages,
+          PATH2,
+          2,
+          2,
+          0,
+          repLevels.toByteArray(),
+          defLevels.toByteArray(),
+          data2.toByteArray(),
+          12);
+      assertNull(r.readNextRowGroup());
+    }
+  }
+
+  @Test
+  public void testColumnIndexFilter() throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+
+    ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
+
+    w.start();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES2), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    // the first page contains one matching record
+    w.writeDataPage(1, 4, BytesInput.from(BYTES3), statsC2(2L), 1, RLE, RLE, PLAIN);
+    // all the records of the second page are larger than 2, so should be filtered out
+    w.writeDataPage(3, 4, BytesInput.from(BYTES4), statsC2(3L, 4L, 5L), 3, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.endBlock();
+
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES2), EMPTY_STATS, 2, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    // the first page should be filtered out
+    w.writeDataPage(1, 4, BytesInput.from(BYTES3), statsC2(4L), 1, RLE, RLE, PLAIN);
+    // the second page will be read since it contains matching record
+    w.writeDataPage(3, 4, BytesInput.from(BYTES4), statsC2(0L, 1L, 3L), 3, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.endBlock();
+
+    w.end(new HashMap<>());
+
+    // set a simple equality filter in the ParquetInputFormat
+    Operators.LongColumn c2 = FilterApi.longColumn("c.d");
+    FilterPredicate p = FilterApi.eq(c2, 2L);
+    ParquetInputFormat.setFilterPredicate(configuration, p);
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = HadoopReadOptions.builder(configuration).build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    try (FileReader r = new FileReader(file, options, cometOptions)) {
+      assertEquals(4, r.getFilteredRecordCount());
+      PageReadStore readStore = r.readNextFilteredRowGroup();
+
+      PageReader c1Reader = readStore.getPageReader(C1);
+      List<DataPage> c1Pages = new ArrayList<>();
+      DataPage page;
+      while ((page = c1Reader.readPage()) != null) {
+        c1Pages.add(page);
+      }
+      // second page of c1 should be filtered out
+      assertEquals(1, c1Pages.size());
+      validatePage(c1Pages.get(0), 2, BytesInput.from(BYTES1));
+
+      PageReader c2Reader = readStore.getPageReader(C2);
+      List<DataPage> c2Pages = new ArrayList<>();
+      while ((page = c2Reader.readPage()) != null) {
+        c2Pages.add(page);
+      }
+      assertEquals(1, c2Pages.size());
+      validatePage(c2Pages.get(0), 1, BytesInput.from(BYTES3));
+
+      // test the second row group
+      readStore = r.readNextFilteredRowGroup();
+      assertNotNull(readStore);
+
+      c1Reader = readStore.getPageReader(C1);
+      c1Pages.clear();
+      while ((page = c1Reader.readPage()) != null) {
+        c1Pages.add(page);
+      }
+      // all pages of c1 should be retained
+      assertEquals(2, c1Pages.size());
+      validatePage(c1Pages.get(0), 2, BytesInput.from(BYTES1));
+      validatePage(c1Pages.get(1), 2, BytesInput.from(BYTES2));
+
+      c2Reader = readStore.getPageReader(C2);
+      c2Pages.clear();
+      while ((page = c2Reader.readPage()) != null) {
+        c2Pages.add(page);
+      }
+      assertEquals(1, c2Pages.size());
+      validatePage(c2Pages.get(0), 3, BytesInput.from(BYTES4));
+    }
+  }
+
+  @Test
+  public void testColumnIndexReadWrite() throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+
+    ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
+    w.start();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(7, 4, BytesInput.from(BYTES3), EMPTY_STATS, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    w.writeDataPage(8, 4, BytesInput.from(BYTES4), EMPTY_STATS, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.endBlock();
+    w.startBlock(4);
+    w.startColumn(C1, 5, CODEC);
+    long c1p1Starts = w.getPos();
+    w.writeDataPage(
+        2, 4, BytesInput.from(BYTES1), statsC1(null, Binary.fromString("aaa")), 1, RLE, RLE, PLAIN);
+    long c1p2Starts = w.getPos();
+    w.writeDataPage(
+        3,
+        4,
+        BytesInput.from(BYTES1),
+        statsC1(Binary.fromString("bbb"), Binary.fromString("ccc")),
+        3,
+        RLE,
+        RLE,
+        PLAIN);
+    w.endColumn();
+    long c1Ends = w.getPos();
+    w.startColumn(C2, 6, CODEC);
+    long c2p1Starts = w.getPos();
+    w.writeDataPage(2, 4, BytesInput.from(BYTES2), statsC2(117L, 100L), 1, RLE, RLE, PLAIN);
+    long c2p2Starts = w.getPos();
+    w.writeDataPage(3, 4, BytesInput.from(BYTES2), statsC2(null, null, null), 2, RLE, RLE, PLAIN);
+    long c2p3Starts = w.getPos();
+    w.writeDataPage(1, 4, BytesInput.from(BYTES2), statsC2(0L), 1, RLE, RLE, PLAIN);
+    w.endColumn();
+    long c2Ends = w.getPos();
+    w.endBlock();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(
+        7,
+        4,
+        BytesInput.from(BYTES3),
+        // Creating huge stats so the column index will reach the limit and won't be written
+        statsC1(
+            Binary.fromConstantByteArray(new byte[(int) MAX_STATS_SIZE]),
+            Binary.fromConstantByteArray(new byte[1])),
+        4,
+        RLE,
+        RLE,
+        PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    w.writeDataPage(8, 4, BytesInput.from(BYTES4), EMPTY_STATS, RLE, RLE, PLAIN);
+    w.endColumn();
+    w.endBlock();
+    w.end(new HashMap<>());
+
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = ParquetReadOptions.builder().build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    try (FileReader reader = new FileReader(file, options, cometOptions)) {
+      ParquetMetadata footer = reader.getFooter();
+      assertEquals(3, footer.getBlocks().size());
+      BlockMetaData blockMeta = footer.getBlocks().get(1);
+      assertEquals(2, blockMeta.getColumns().size());
+
+      ColumnIndexReader indexReader = reader.getColumnIndexReader(1);
+      ColumnIndex columnIndex = indexReader.readColumnIndex(blockMeta.getColumns().get(0));
+      assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+      assertEquals(Arrays.asList(1L, 0L), columnIndex.getNullCounts());
+      assertEquals(Arrays.asList(false, false), columnIndex.getNullPages());
+      List<ByteBuffer> minValues = columnIndex.getMinValues();
+      assertEquals(2, minValues.size());
+      List<ByteBuffer> maxValues = columnIndex.getMaxValues();
+      assertEquals(2, maxValues.size());
+      assertEquals("aaa", new String(minValues.get(0).array(), StandardCharsets.UTF_8));
+      assertEquals("aaa", new String(maxValues.get(0).array(), StandardCharsets.UTF_8));
+      assertEquals("bbb", new String(minValues.get(1).array(), StandardCharsets.UTF_8));
+      assertEquals("ccc", new String(maxValues.get(1).array(), StandardCharsets.UTF_8));
+
+      columnIndex = indexReader.readColumnIndex(blockMeta.getColumns().get(1));
+      assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+      assertEquals(Arrays.asList(0L, 3L, 0L), columnIndex.getNullCounts());
+      assertEquals(Arrays.asList(false, true, false), columnIndex.getNullPages());
+      minValues = columnIndex.getMinValues();
+      assertEquals(3, minValues.size());
+      maxValues = columnIndex.getMaxValues();
+      assertEquals(3, maxValues.size());
+      assertEquals(100, BytesUtils.bytesToLong(minValues.get(0).array()));
+      assertEquals(117, BytesUtils.bytesToLong(maxValues.get(0).array()));
+      assertEquals(0, minValues.get(1).array().length);
+      assertEquals(0, maxValues.get(1).array().length);
+      assertEquals(0, BytesUtils.bytesToLong(minValues.get(2).array()));
+      assertEquals(0, BytesUtils.bytesToLong(maxValues.get(2).array()));
+
+      OffsetIndex offsetIndex = indexReader.readOffsetIndex(blockMeta.getColumns().get(0));
+      assertEquals(2, offsetIndex.getPageCount());
+      assertEquals(c1p1Starts, offsetIndex.getOffset(0));
+      assertEquals(c1p2Starts, offsetIndex.getOffset(1));
+      assertEquals(c1p2Starts - c1p1Starts, offsetIndex.getCompressedPageSize(0));
+      assertEquals(c1Ends - c1p2Starts, offsetIndex.getCompressedPageSize(1));
+      assertEquals(0, offsetIndex.getFirstRowIndex(0));
+      assertEquals(1, offsetIndex.getFirstRowIndex(1));
+
+      offsetIndex = indexReader.readOffsetIndex(blockMeta.getColumns().get(1));
+      assertEquals(3, offsetIndex.getPageCount());
+      assertEquals(c2p1Starts, offsetIndex.getOffset(0));
+      assertEquals(c2p2Starts, offsetIndex.getOffset(1));
+      assertEquals(c2p3Starts, offsetIndex.getOffset(2));
+      assertEquals(c2p2Starts - c2p1Starts, offsetIndex.getCompressedPageSize(0));
+      assertEquals(c2p3Starts - c2p2Starts, offsetIndex.getCompressedPageSize(1));
+      assertEquals(c2Ends - c2p3Starts, offsetIndex.getCompressedPageSize(2));
+      assertEquals(0, offsetIndex.getFirstRowIndex(0));
+      assertEquals(1, offsetIndex.getFirstRowIndex(1));
+      assertEquals(3, offsetIndex.getFirstRowIndex(2));
+
+      assertNull(indexReader.readColumnIndex(footer.getBlocks().get(2).getColumns().get(0)));
+    }
+  }
+
+  // Test reader with merging of scan ranges enabled
+  @Test
+  public void testWriteReadMergeScanRange() throws Throwable {
+    Configuration conf = new Configuration();
+    conf.set(ReadOptions.COMET_IO_MERGE_RANGES, Boolean.toString(true));
+    // Set the merge range delta so small that ranges do not get merged
+    conf.set(ReadOptions.COMET_IO_MERGE_RANGES_DELTA, Integer.toString(1024));
+    testReadWrite(conf, 2, 1024);
+    // Set the merge range delta so large that all ranges get merged
+    conf.set(ReadOptions.COMET_IO_MERGE_RANGES_DELTA, Integer.toString(1024 * 1024));
+    testReadWrite(conf, 2, 1024);
+  }
+
+  // `addBloomFilter` is package-private in Parquet, so this uses reflection to access it
+  private void addBloomFilter(ParquetFileWriter w, String s, BloomFilter filter) throws Exception {
+    Method method =
+        ParquetFileWriter.class.getDeclaredMethod(
+            "addBloomFilter", String.class, BloomFilter.class);
+    method.setAccessible(true);
+    method.invoke(w, s, filter);
+  }
+
+  private void validateContains(PageReadStore pages, String[] path, int values, BytesInput bytes)
+      throws IOException {
+    PageReader pageReader = pages.getPageReader(SCHEMA.getColumnDescription(path));
+    DataPage page = pageReader.readPage();
+    validatePage(page, values, bytes);
+  }
+
+  private void validatePage(DataPage page, int values, BytesInput bytes) throws IOException {
+    assertEquals(values, page.getValueCount());
+    assertArrayEquals(bytes.toByteArray(), ((DataPageV1) page).getBytes().toByteArray());
+  }
+
+  private void validateV2Page(
+      PageReadStore pages,
+      String[] path,
+      int values,
+      int rows,
+      int nullCount,
+      byte[] repetition,
+      byte[] definition,
+      byte[] data,
+      int uncompressedSize)
+      throws IOException {
+    PageReader pageReader = pages.getPageReader(SCHEMA.getColumnDescription(path));
+    DataPageV2 page = (DataPageV2) pageReader.readPage();
+    assertEquals(values, page.getValueCount());
+    assertEquals(rows, page.getRowCount());
+    assertEquals(nullCount, page.getNullCount());
+    assertEquals(uncompressedSize, page.getUncompressedSize());
+    assertArrayEquals(repetition, page.getRepetitionLevels().toByteArray());
+    assertArrayEquals(definition, page.getDefinitionLevels().toByteArray());
+    assertArrayEquals(data, page.getData().toByteArray());
+  }
+
+  private Statistics<?> createStatistics(String min, String max, ColumnDescriptor col) {
+    return Statistics.getBuilderForReading(col.getPrimitiveType())
+        .withMin(Binary.fromString(min).getBytes())
+        .withMax(Binary.fromString(max).getBytes())
+        .withNumNulls(0)
+        .build();
+  }
+
+  public static void assertStatsValuesEqual(Statistics<?> expected, Statistics<?> actual) {
+    if (expected == actual) {
+      return;
+    }
+    if (expected == null || actual == null) {
+      assertEquals(expected, actual);
+    }
+    Assert.assertArrayEquals(expected.getMaxBytes(), actual.getMaxBytes());
+    Assert.assertArrayEquals(expected.getMinBytes(), actual.getMinBytes());
+    Assert.assertEquals(expected.getNumNulls(), actual.getNumNulls());
+  }
+
+  private Statistics<?> statsC1(Binary... values) {
+    Statistics<?> stats = Statistics.createStats(C1.getPrimitiveType());
+    for (Binary value : values) {
+      if (value == null) {
+        stats.incrementNumNulls();
+      } else {
+        stats.updateStats(value);
+      }
+    }
+    return stats;
+  }
+
+  /**
+   * Generates arbitrary data for simple schemas, writes the data to a file and also returns the
+   * data.
+   *
+   * @return array of data pages for each column
+   */
+  private HashMap<String, byte[][]> generateAndWriteData(
+      Configuration configuration,
+      Path path,
+      MessageType schema,
+      int numPages,
+      int numRecordsPerPage)
+      throws IOException {
+
+    HashMap<String, byte[][]> dataPages = new HashMap<>();
+
+    Generator generator = new Generator();
+    ParquetFileWriter writer = new ParquetFileWriter(configuration, schema, path);
+    writer.start();
+    writer.startBlock((long) numPages * numRecordsPerPage);
+    for (ColumnDescriptor colDesc : schema.getColumns()) {
+      writer.startColumn(colDesc, (long) numPages * numRecordsPerPage, CODEC);
+      String type = colDesc.getPrimitiveType().getName();
+      byte[][] allPages = new byte[numPages][];
+      byte[] data;
+      for (int i = 0; i < numPages; i++) {
+        data = generator.generateValues(numRecordsPerPage, type);
+        writer.writeDataPage(
+            numRecordsPerPage,
+            data.length,
+            BytesInput.from(data),
+            EMPTY_STATS,
+            numRecordsPerPage,
+            RLE,
+            RLE,
+            PLAIN);
+        allPages[i] = data;
+      }
+      dataPages.put(String.join(".", colDesc.getPath()), allPages);
+      writer.endColumn();
+    }
+    writer.endBlock();
+    writer.end(new HashMap<>());
+    return dataPages;
+  }
+
+  private void readAndValidatePageData(
+      InputFile inputFile,
+      ParquetReadOptions options,
+      ReadOptions cometOptions,
+      MessageType schema,
+      HashMap<String, byte[][]> expected,
+      int expectedValuesPerPage)
+      throws IOException {
+    try (FileReader fileReader = new FileReader(inputFile, options, cometOptions)) {
+      fileReader.setRequestedSchema(schema.getColumns());
+      PageReadStore pages = fileReader.readNextRowGroup();
+      for (ColumnDescriptor colDesc : schema.getColumns()) {
+        byte[][] allExpectedPages = expected.get(String.join(".", colDesc.getPath()));
+        PageReader pageReader = pages.getPageReader(colDesc);
+        for (byte[] expectedPage : allExpectedPages) {
+          DataPage page = pageReader.readPage();
+          validatePage(page, expectedValuesPerPage, BytesInput.from(expectedPage));
+        }
+      }
+    }
+  }
+
+  public void testReadWrite(Configuration configuration, int numPages, int numRecordsPerPage)
+      throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    HashMap<String, byte[][]> dataPages =
+        generateAndWriteData(configuration, path, SCHEMA2, numPages, numRecordsPerPage);
+    InputFile file = HadoopInputFile.fromPath(path, configuration);
+    ParquetReadOptions options = ParquetReadOptions.builder().build();
+    ReadOptions cometOptions = ReadOptions.builder(configuration).build();
+
+    readAndValidatePageData(
+        file, options, cometOptions, PROJECTED_SCHEMA2, dataPages, numRecordsPerPage);
+  }
+
+  static class Generator {
+
+    static Random random = new Random(1729);
+    private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -";
+    private static final int STR_MIN_SIZE = 5;
+    private static final int STR_MAX_SIZE = 30;
+
+    private byte[] getString(int minSize, int maxSize) {
+      int size = random.nextInt(maxSize - minSize) + minSize;
+      byte[] str = new byte[size];
+      for (int i = 0; i < size; ++i) {
+        str[i] = (byte) ALPHABET.charAt(random.nextInt(ALPHABET.length()));
+      }
+      return str;
+    }
+
+    private byte[] generateValues(int numValues, String type) throws IOException {
+
+      if (type.equals("int32")) {
+        byte[] data = new byte[4 * numValues];
+        random.nextBytes(data);
+        return data;
+      } else {
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        for (int i = 0; i < numValues; i++) {
+          outputStream.write(getString(STR_MIN_SIZE, STR_MAX_SIZE));
+        }
+        return outputStream.toByteArray();
+      }
+    }
+  }
+
+  private Statistics<?> statsC2(Long... values) {
+    Statistics<?> stats = Statistics.createStats(C2.getPrimitiveType());
+    for (Long value : values) {
+      if (value == null) {
+        stats.incrementNumNulls();
+      } else {
+        stats.updateStats(value);
+      }
+    }
+    return stats;
+  }
+}
diff --git a/common/src/test/resources/log4j.properties b/common/src/test/resources/log4j.properties
new file mode 100644
index 000000000..2f46ce155
--- /dev/null
+++ b/common/src/test/resources/log4j.properties
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+test.appender=file
+log4j.rootCategory=INFO, ${test.appender}
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/common/src/test/resources/log4j2.properties b/common/src/test/resources/log4j2.properties
new file mode 100644
index 000000000..04cdf8533
--- /dev/null
+++ b/common/src/test/resources/log4j2.properties
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
+
diff --git a/conf/log4rs.yaml b/conf/log4rs.yaml
new file mode 100644
index 000000000..43277918f
--- /dev/null
+++ b/conf/log4rs.yaml
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+appenders:
+  unittest:
+    kind: file
+    path: "target/unit-tests.log"
+
+root:
+  level: info
+  appenders:
+    - unittest
diff --git a/core/Cargo.lock b/core/Cargo.lock
new file mode 100644
index 000000000..0585d7ec7
--- /dev/null
+++ b/core/Cargo.lock
@@ -0,0 +1,3191 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "addr2line"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "ahash"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
+dependencies = [
+ "cfg-if",
+ "const-random",
+ "getrandom",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
+
+[[package]]
+name = "anyhow"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
+
+[[package]]
+name = "arc-swap"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
+
+[[package]]
+name = "arrayref"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+
+[[package]]
+name = "arrow"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614"
+dependencies = [
+ "ahash",
+ "arrow-arith",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-csv",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-json",
+ "arrow-ord",
+ "arrow-row",
+ "arrow-schema",
+ "arrow-select",
+ "arrow-string",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half 2.1.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-array"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d"
+dependencies = [
+ "ahash",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "chrono-tz",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+ "num",
+]
+
+[[package]]
+name = "arrow-buffer"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c"
+dependencies = [
+ "bytes",
+ "half 2.1.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "base64",
+ "chrono",
+ "comfy-table",
+ "half 2.1.0",
+ "lexical-core",
+ "num",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "csv",
+ "csv-core",
+ "lazy_static",
+ "lexical-core",
+ "regex",
+]
+
+[[package]]
+name = "arrow-data"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634"
+dependencies = [
+ "arrow-buffer",
+ "arrow-schema",
+ "half 2.1.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "flatbuffers",
+]
+
+[[package]]
+name = "arrow-json"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half 2.1.0",
+ "indexmap 2.1.0",
+ "lexical-core",
+ "num",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "half 2.1.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-row"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+]
+
+[[package]]
+name = "arrow-schema"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167"
+dependencies = [
+ "bitflags 2.4.1",
+]
+
+[[package]]
+name = "arrow-select"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "arrow-string"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "num",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "assertables"
+version = "7.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c24e9d990669fbd16806bff449e4ac644fd9b1fca014760087732fe4102f131"
+
+[[package]]
+name = "async-trait"
+version = "0.1.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "backtrace"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
+dependencies = [
+ "addr2line",
+ "cc",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+]
+
+[[package]]
+name = "base64"
+version = "0.21.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "blake3"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "brotli"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "2.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
+
+[[package]]
+name = "bytemuck"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cc"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
+dependencies = [
+ "jobserver",
+ "libc",
+]
+
+[[package]]
+name = "cesu8"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "chrono-tz"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7"
+dependencies = [
+ "chrono",
+ "chrono-tz-build",
+ "phf",
+]
+
+[[package]]
+name = "chrono-tz-build"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f"
+dependencies = [
+ "parse-zoneinfo",
+ "phf",
+ "phf_codegen",
+]
+
+[[package]]
+name = "ciborium"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
+dependencies = [
+ "ciborium-io",
+ "half 1.8.2",
+]
+
+[[package]]
+name = "clap"
+version = "4.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52bdc885e4cacc7f7c9eedc1ef6da641603180c783c41a15c264944deeaab642"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
+
+[[package]]
+name = "combine"
+version = "4.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
+dependencies = [
+ "bytes",
+ "memchr",
+]
+
+[[package]]
+name = "comet"
+version = "0.1.0"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-string",
+ "assertables",
+ "async-trait",
+ "brotli",
+ "byteorder",
+ "bytes",
+ "chrono",
+ "chrono-tz",
+ "crc32fast",
+ "criterion",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-physical-expr",
+ "flate2",
+ "futures",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+ "itertools 0.11.0",
+ "jni",
+ "lazy_static",
+ "log",
+ "log4rs",
+ "lz4",
+ "mimalloc",
+ "num",
+ "once_cell",
+ "parking_lot",
+ "parquet",
+ "parquet-format",
+ "paste",
+ "pprof",
+ "prost 0.12.3",
+ "prost-build",
+ "rand",
+ "regex",
+ "serde",
+ "simd-adler32",
+ "snap",
+ "tempfile",
+ "thiserror",
+ "thrift 0.17.0",
+ "tokio",
+ "tokio-stream",
+ "unicode-segmentation",
+ "zstd",
+]
+
+[[package]]
+name = "comfy-table"
+version = "7.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686"
+dependencies = [
+ "strum",
+ "strum_macros",
+ "unicode-width",
+]
+
+[[package]]
+name = "const-random"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a"
+dependencies = [
+ "const-random-macro",
+]
+
+[[package]]
+name = "const-random-macro"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
+dependencies = [
+ "getrandom",
+ "once_cell",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+
+[[package]]
+name = "cpp_demangle"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools 0.10.5",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dashmap"
+version = "5.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+dependencies = [
+ "cfg-if",
+ "hashbrown 0.14.3",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "datafusion"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-schema",
+ "async-trait",
+ "bytes",
+ "chrono",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-optimizer",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-sql",
+ "futures",
+ "glob",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+ "indexmap 2.1.0",
+ "itertools 0.12.0",
+ "log",
+ "num_cpus",
+ "object_store",
+ "parking_lot",
+ "pin-project-lite",
+ "rand",
+ "sqlparser",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-common"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
+ "chrono",
+ "half 2.1.0",
+ "libc",
+ "num_cpus",
+ "object_store",
+ "sqlparser",
+]
+
+[[package]]
+name = "datafusion-execution"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d"
+dependencies = [
+ "arrow",
+ "chrono",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-expr",
+ "futures",
+ "hashbrown 0.14.3",
+ "log",
+ "object_store",
+ "parking_lot",
+ "rand",
+ "tempfile",
+ "url",
+]
+
+[[package]]
+name = "datafusion-expr"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "datafusion-common",
+ "paste",
+ "sqlparser",
+ "strum",
+ "strum_macros",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "hashbrown 0.14.3",
+ "itertools 0.12.0",
+ "log",
+ "regex-syntax",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-ord",
+ "arrow-schema",
+ "base64",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+ "hex",
+ "indexmap 2.1.0",
+ "itertools 0.12.0",
+ "log",
+ "md-5",
+ "paste",
+ "petgraph",
+ "rand",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-physical-plan"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "futures",
+ "half 2.1.0",
+ "hashbrown 0.14.3",
+ "indexmap 2.1.0",
+ "itertools 0.12.0",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "pin-project-lite",
+ "rand",
+ "tokio",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-sql"
+version = "34.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5"
+dependencies = [
+ "arrow",
+ "arrow-schema",
+ "datafusion-common",
+ "datafusion-expr",
+ "log",
+ "sqlparser",
+]
+
+[[package]]
+name = "debugid"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
+dependencies = [
+ "uuid",
+]
+
+[[package]]
+name = "derivative"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "destructure_traitobject"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c877555693c14d2f84191cfd3ad8582790fc52b5e2274b40b59cf5f5cea25c7"
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "doc-comment"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+
+[[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "errno"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
+
+[[package]]
+name = "findshlibs"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
+dependencies = [
+ "cc",
+ "lazy_static",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "fixedbitset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+
+[[package]]
+name = "flatbuffers"
+version = "23.5.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640"
+dependencies = [
+ "bitflags 1.3.2",
+ "rustc_version",
+]
+
+[[package]]
+name = "flate2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
+
+[[package]]
+name = "futures-task"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
+
+[[package]]
+name = "futures-util"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "gimli"
+version = "0.28.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
+[[package]]
+name = "half"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
+
+[[package]]
+name = "half"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554"
+dependencies = [
+ "crunchy",
+ "num-traits",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
+
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "home"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.59"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "idna"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.14.3",
+]
+
+[[package]]
+name = "inferno"
+version = "0.11.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9"
+dependencies = [
+ "ahash",
+ "indexmap 2.1.0",
+ "is-terminal",
+ "itoa",
+ "log",
+ "num-format",
+ "once_cell",
+ "quick-xml",
+ "rgb",
+ "str_stack",
+]
+
+[[package]]
+name = "integer-encoding"
+version = "1.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f"
+
+[[package]]
+name = "integer-encoding"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
+dependencies = [
+ "hermit-abi",
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
+
+[[package]]
+name = "jni"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
+dependencies = [
+ "cesu8",
+ "combine",
+ "jni-sys",
+ "log",
+ "thiserror",
+ "walkdir",
+]
+
+[[package]]
+name = "jni-sys"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
+
+[[package]]
+name = "jobserver"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lexical-core"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-util"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
+dependencies = [
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.151"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
+
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
+[[package]]
+name = "libmimalloc-sys"
+version = "0.1.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
+
+[[package]]
+name = "lock_api"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "log-mdc"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a94d21414c1f4a51209ad204c1776a3d0765002c76c6abcb602a6f09f1e881c7"
+
+[[package]]
+name = "log4rs"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d36ca1786d9e79b8193a68d480a0907b612f109537115c6ff655a3a1967533fd"
+dependencies = [
+ "anyhow",
+ "arc-swap",
+ "chrono",
+ "derivative",
+ "fnv",
+ "humantime",
+ "libc",
+ "log",
+ "log-mdc",
+ "parking_lot",
+ "serde",
+ "serde-value",
+ "serde_json",
+ "serde_yaml",
+ "thiserror",
+ "thread-id",
+ "typemap-ors",
+ "winapi",
+]
+
+[[package]]
+name = "lz4"
+version = "1.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1"
+dependencies = [
+ "libc",
+ "lz4-sys",
+]
+
+[[package]]
+name = "lz4-sys"
+version = "1.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
+
+[[package]]
+name = "memmap2"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "mimalloc"
+version = "0.1.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c"
+dependencies = [
+ "libmimalloc-sys",
+]
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
+dependencies = [
+ "adler",
+]
+
+[[package]]
+name = "multimap"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
+
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+]
+
+[[package]]
+name = "num"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-format"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
+dependencies = [
+ "arrayvec",
+ "itoa",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.32.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "object_store"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "chrono",
+ "futures",
+ "humantime",
+ "itertools 0.11.0",
+ "parking_lot",
+ "percent-encoding",
+ "snafu",
+ "tokio",
+ "tracing",
+ "url",
+ "walkdir",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "oorandom"
+version = "11.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+
+[[package]]
+name = "ordered-float"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "parquet"
+version = "49.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4"
+dependencies = [
+ "ahash",
+ "bytes",
+ "chrono",
+ "hashbrown 0.14.3",
+ "num",
+ "num-bigint",
+ "paste",
+ "seq-macro",
+ "thrift 0.17.0",
+ "twox-hash",
+]
+
+[[package]]
+name = "parquet-format"
+version = "4.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5"
+dependencies = [
+ "thrift 0.13.0",
+]
+
+[[package]]
+name = "parse-zoneinfo"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41"
+dependencies = [
+ "regex",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
+[[package]]
+name = "petgraph"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
+dependencies = [
+ "fixedbitset",
+ "indexmap 2.1.0",
+]
+
+[[package]]
+name = "phf"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
+dependencies = [
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
+dependencies = [
+ "phf_shared",
+ "rand",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
+
+[[package]]
+name = "plotters"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "pprof"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb"
+dependencies = [
+ "backtrace",
+ "cfg-if",
+ "findshlibs",
+ "inferno",
+ "libc",
+ "log",
+ "nix",
+ "once_cell",
+ "parking_lot",
+ "smallvec",
+ "symbolic-demangle",
+ "tempfile",
+ "thiserror",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "prost"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "444879275cb4fd84958b1a1d5420d15e6fcf7c235fe47f053c9c2a80aceb6001"
+dependencies = [
+ "bytes",
+ "prost-derive 0.9.0",
+]
+
+[[package]]
+name = "prost"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a"
+dependencies = [
+ "bytes",
+ "prost-derive 0.12.3",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62941722fb675d463659e49c4f3fe1fe792ff24fe5bbaa9c08cd3b98a1c354f5"
+dependencies = [
+ "bytes",
+ "heck 0.3.3",
+ "itertools 0.10.5",
+ "lazy_static",
+ "log",
+ "multimap",
+ "petgraph",
+ "prost 0.9.0",
+ "prost-types",
+ "regex",
+ "tempfile",
+ "which",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9cc1a3263e07e0bf68e96268f37665207b49560d98739662cdfaae215c720fe"
+dependencies = [
+ "anyhow",
+ "itertools 0.10.5",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
+dependencies = [
+ "anyhow",
+ "itertools 0.11.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534b7a0e836e3c482d2693070f982e39e7611da9695d4d1f5a4b186b51faef0a"
+dependencies = [
+ "bytes",
+ "prost 0.9.0",
+]
+
+[[package]]
+name = "quick-xml"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rayon"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+
+[[package]]
+name = "rgb"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustix"
+version = "0.38.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
+dependencies = [
+ "bitflags 2.4.1",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
+
+[[package]]
+name = "ryu"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
+
+[[package]]
+name = "seq-macro"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
+
+[[package]]
+name = "serde"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde-value"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
+dependencies = [
+ "ordered-float 2.10.1",
+ "serde",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.194"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.111"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
+dependencies = [
+ "indexmap 1.9.3",
+ "ryu",
+ "serde",
+ "yaml-rust",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
+[[package]]
+name = "siphasher"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
+
+[[package]]
+name = "slab"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
+
+[[package]]
+name = "snafu"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6"
+dependencies = [
+ "doc-comment",
+ "snafu-derive",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf"
+dependencies = [
+ "heck 0.4.1",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "snap"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+
+[[package]]
+name = "sqlparser"
+version = "0.40.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5"
+dependencies = [
+ "log",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "str_stack"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
+
+[[package]]
+name = "strum"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.25.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
+dependencies = [
+ "heck 0.4.1",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "subtle"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
+
+[[package]]
+name = "symbolic-common"
+version = "12.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cccfffbc6bb3bb2d3a26cd2077f4d055f6808d266f9d4d158797a4c60510dfe"
+dependencies = [
+ "debugid",
+ "memmap2",
+ "stable_deref_trait",
+ "uuid",
+]
+
+[[package]]
+name = "symbolic-demangle"
+version = "12.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a99812da4020a67e76c4eb41f08c87364c14170495ff780f30dd519c221a68"
+dependencies = [
+ "cpp_demangle",
+ "rustc-demangle",
+ "symbolic-common",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "redox_syscall",
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "thread-id"
+version = "4.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "threadpool"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
+dependencies = [
+ "num_cpus",
+]
+
+[[package]]
+name = "thrift"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b"
+dependencies = [
+ "byteorder",
+ "integer-encoding 1.1.7",
+ "log",
+ "ordered-float 1.1.1",
+ "threadpool",
+]
+
+[[package]]
+name = "thrift"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
+dependencies = [
+ "byteorder",
+ "integer-encoding 3.0.4",
+ "log",
+ "ordered-float 2.10.1",
+ "threadpool",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokio"
+version = "1.35.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "num_cpus",
+ "parking_lot",
+ "pin-project-lite",
+ "tokio-macros",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tracing"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
+[[package]]
+name = "typemap-ors"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a68c24b707f02dd18f1e4ccceb9d49f2058c2fb86384ef9972592904d7a28867"
+dependencies = [
+ "unsafe-any-ors",
+]
+
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
+
+[[package]]
+name = "unsafe-any-ors"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0a303d30665362d9680d7d91d78b23f5f899504d4f08b3c4cf08d055d87c0ad"
+dependencies = [
+ "destructure_traitobject",
+]
+
+[[package]]
+name = "url"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "uuid"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "walkdir"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
+
+[[package]]
+name = "web-sys"
+version = "0.3.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+dependencies = [
+ "windows-targets 0.52.0",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.0",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.0",
+ "windows_aarch64_msvc 0.52.0",
+ "windows_i686_gnu 0.52.0",
+ "windows_i686_msvc 0.52.0",
+ "windows_x86_64_gnu 0.52.0",
+ "windows_x86_64_gnullvm 0.52.0",
+ "windows_x86_64_msvc 0.52.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
+
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.7.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
+[[package]]
+name = "zstd"
+version = "0.11.2+zstd.1.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "5.0.2+zstd.1.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
+dependencies = [
+ "libc",
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.9+zstd.1.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/core/Cargo.toml b/core/Cargo.toml
new file mode 100644
index 000000000..adc3732e3
--- /dev/null
+++ b/core/Cargo.toml
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "comet"
+version = "0.1.0"
+edition = "2021"
+include = [
+    "benches/*.rs",
+    "src/**/*.rs",
+    "Cargo.toml",
+]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate
+arrow = { version = "~49.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
+arrow-array = { version = "~49.0.0" }
+arrow-data = { version = "~49.0.0" }
+arrow-schema = { version = "~49.0.0" }
+arrow-string = { version = "~49.0.0" }
+parquet = { version = "~49.0.0", default-features = false, features = ["experimental"] }
+half = { version = "~2.1", default-features = false }
+futures = "0.3.28"
+mimalloc = { version = "*", default-features = false, optional = true }
+tokio = { version = "1", features = ["rt-multi-thread"] }
+tokio-stream = { version = "0.1.8" }
+async-trait = "0.1"
+log = "0.4"
+log4rs = "1.2.0"
+thiserror = "1"
+serde = { version = "1", features = ["derive"] }
+lazy_static = "1.4.0"
+prost = "0.12.1"
+thrift = "0.17"
+jni = "0.19"
+byteorder = "1.4.3"
+snap = "1.1"
+brotli = "3.3"
+flate2 = "1.0"
+lz4 = "1.24"
+zstd = "0.11"
+rand = "0.8"
+num = "0.4"
+bytes = "1.5.0"
+hashbrown = { version = "0.14", features = ["raw"] }
+parking_lot = "0.12"
+tempfile = "3.8.0"
+ahash = { version = "0.8", default-features = false }
+itertools = "0.11.0"
+chrono = { version = "0.4", default-features = false, features = ["clock"] }
+chrono-tz = { version = "0.8" }
+paste = "1.0.14"
+datafusion-common = { version = "34.0.0" }
+datafusion = { default-features = false, version = "34.0.0", features = ["unicode_expressions"] }
+datafusion-physical-expr = { version = "34.0.0", default-features = false , features = ["unicode_expressions"] }
+unicode-segmentation = "^1.10.1"
+once_cell = "1.18.0"
+regex = "1.9.6"
+crc32fast = "1.3.2"
+simd-adler32 = "0.3.7"
+
+[build-dependencies]
+prost-build = "0.9.0"
+
+[dev-dependencies]
+pprof = { version = "0.13.0", features = ["flamegraph"] }
+criterion = "0.5.1"
+jni = { version = "0.19", features = ["invocation"] }
+lazy_static = "1.4"
+assertables = "7"
+
+[features]
+default = []
+nightly = []
+
+[profile.release]
+debug = true
+overflow-checks = false
+lto = "thin"
+codegen-units = 1
+strip = "debuginfo"
+
+[lib]
+name = "comet"
+# "rlib" is for benchmarking with criterion.
+crate_type = ["cdylib", "rlib"]
+
+[[bench]]
+name = "parquet_read"
+harness = false
+
+[[bench]]
+name = "bit_util"
+harness = false
+
+[[bench]]
+name = "hash"
+harness = false
+
diff --git a/core/benches/bit_util.rs b/core/benches/bit_util.rs
new file mode 100644
index 000000000..e92dd6375
--- /dev/null
+++ b/core/benches/bit_util.rs
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{mem::size_of, time::Duration};
+
+use rand::{thread_rng, Rng};
+
+use arrow::buffer::Buffer;
+use comet::common::bit::{
+    log2, read_num_bytes_u32, read_num_bytes_u64, set_bits, BitReader, BitWriter,
+};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+/// Benchmark to measure bit_util performance.
+/// To run this benchmark:
+/// `cd core && cargo bench --bench bit_util`
+/// Results will be written to "core/target/criterion/bit_util/"
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("bit_util");
+
+    const N: usize = 1024 * 1024;
+    let mut writer: BitWriter = BitWriter::new(N * 10);
+    for _ in 0..N {
+        if !writer.put_vlq_int(thread_rng().gen::<u64>()) {
+            break;
+        }
+    }
+    let buffer = writer.consume();
+    let buffer = Buffer::from(&buffer);
+
+    // log2
+    for bits in (0..64).step_by(3) {
+        let x = 1u64 << bits;
+        group.bench_with_input(BenchmarkId::new("log2", bits), &x, |b, &x| {
+            b.iter(|| log2(black_box(x)));
+        });
+    }
+
+    // set_bits
+    for offset in (0..16).step_by(3) {
+        for length in (0..16).step_by(3) {
+            let x = (offset, length);
+            group.bench_with_input(
+                BenchmarkId::new("set_bits", format!("offset_{}_length_{}", x.0, x.1)),
+                &x,
+                |b, &x| {
+                    b.iter(|| set_bits(&mut [0u8; 4], black_box(x.0), black_box(x.1)));
+                },
+            );
+        }
+    }
+
+    // get_vlq_int
+    group.bench_function("get_vlq_int", |b| {
+        b.iter(|| {
+            let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+            bench_get_vlq_int(&mut reader)
+        })
+    });
+
+    // get_bits
+    for offset in (0..32).step_by(17) {
+        for num_bits in (1..5).step_by(1) {
+            let x = (offset, num_bits);
+            group.bench_with_input(
+                BenchmarkId::new("get_bits", format!("offset_{}_num_bits_{}", x.0, x.1)),
+                &x,
+                |b, &x| {
+                    let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+                    b.iter(|| reader.get_bits(&mut [0u8; 4], black_box(x.0), black_box(x.1)));
+                },
+            );
+        }
+    }
+
+    // get_aligned
+    for num_bytes in (1..=size_of::<u8>()).step_by(3) {
+        let x = num_bytes;
+        group.bench_with_input(
+            BenchmarkId::new("get_aligned", format!("u8_num_bytes_{}", x)),
+            &x,
+            |b, &x| {
+                let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+                b.iter(|| reader.get_aligned::<u8>(black_box(x)));
+            },
+        );
+    }
+    for num_bytes in (1..=size_of::<u32>()).step_by(3) {
+        let x = num_bytes;
+        group.bench_with_input(
+            BenchmarkId::new("get_aligned", format!("u32_num_bytes_{}", x)),
+            &x,
+            |b, &x| {
+                let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+                b.iter(|| reader.get_aligned::<u32>(black_box(x)));
+            },
+        );
+    }
+    for num_bytes in (1..=size_of::<i32>()).step_by(3) {
+        let x = num_bytes;
+        group.bench_with_input(
+            BenchmarkId::new("get_aligned", format!("i32_num_bytes_{}", x)),
+            &x,
+            |b, &x| {
+                let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+                b.iter(|| reader.get_aligned::<i32>(black_box(x)));
+            },
+        );
+    }
+
+    // get_value
+    for num_bytes in (1..=size_of::<i32>()).step_by(3) {
+        let x = num_bytes * 8;
+        group.bench_with_input(
+            BenchmarkId::new("get_value", format!("i32_num_bits_{}", x)),
+            &x,
+            |b, &x| {
+                let mut reader: BitReader = BitReader::new_all(buffer.slice(0));
+                b.iter(|| reader.get_value::<i32>(black_box(x)));
+            },
+        );
+    }
+
+    // read_num_bytes_u64
+    for num_bytes in (1..=8).step_by(7) {
+        let x = num_bytes;
+        group.bench_with_input(
+            BenchmarkId::new("read_num_bytes_u64", format!("num_bytes_{}", x)),
+            &x,
+            |b, &x| {
+                b.iter(|| read_num_bytes_u64(black_box(x), black_box(buffer.as_slice())));
+            },
+        );
+    }
+
+    // read_num_bytes_u32
+    for num_bytes in (1..=4).step_by(3) {
+        let x = num_bytes;
+        group.bench_with_input(
+            BenchmarkId::new("read_num_bytes_u32", format!("num_bytes_{}", x)),
+            &x,
+            |b, &x| {
+                b.iter(|| read_num_bytes_u32(black_box(x), black_box(buffer.as_slice())));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_get_vlq_int(reader: &mut BitReader) {
+    while let Some(v) = reader.get_vlq_int() {
+        black_box(v);
+    }
+}
+
+fn config() -> Criterion {
+    Criterion::default()
+        .measurement_time(Duration::from_millis(500))
+        .warm_up_time(Duration::from_millis(500))
+}
+
+criterion_group! {
+    name = benches;
+    config = config();
+    targets = criterion_benchmark
+}
+criterion_main!(benches);
diff --git a/core/benches/common.rs b/core/benches/common.rs
new file mode 100644
index 000000000..059721698
--- /dev/null
+++ b/core/benches/common.rs
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+    array::{DictionaryArray, Int64Array, PrimitiveArray},
+    datatypes::{ArrowPrimitiveType, Int32Type},
+};
+use arrow_schema::ArrowError;
+use rand::{
+    distributions::{Distribution, Standard},
+    rngs::StdRng,
+    Rng, SeedableRng,
+};
+use std::sync::Arc;
+
+/// Returns fixed seedable RNG
+pub fn seedable_rng() -> StdRng {
+    StdRng::seed_from_u64(42)
+}
+
+pub fn create_int64_array(size: usize, null_density: f32, min: i64, max: i64) -> Int64Array {
+    let mut rng = seedable_rng();
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                Some(rng.gen_range(min..max))
+            }
+        })
+        .collect()
+}
+
+pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
+where
+    T: ArrowPrimitiveType,
+    Standard: Distribution<T::Native>,
+{
+    let mut rng = seedable_rng();
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
+
+/// Creates a dictionary with random keys and values, with value type `T`.
+/// Note here the keys are the dictionary indices.
+pub fn create_dictionary_array<T>(
+    size: usize,
+    value_size: usize,
+    null_density: f32,
+) -> Result<DictionaryArray<Int32Type>, ArrowError>
+where
+    T: ArrowPrimitiveType,
+    Standard: Distribution<T::Native>,
+{
+    // values are not null
+    let values = create_primitive_array::<T>(value_size, 0.0);
+    let keys = create_primitive_array::<Int32Type>(size, null_density)
+        .iter()
+        .map(|v| v.map(|w| w.abs() % (value_size as i32)))
+        .collect();
+    DictionaryArray::try_new(keys, Arc::new(values))
+}
diff --git a/core/benches/hash.rs b/core/benches/hash.rs
new file mode 100644
index 000000000..dafad79dd
--- /dev/null
+++ b/core/benches/hash.rs
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[path = "common.rs"]
+mod common;
+
+use arrow_array::ArrayRef;
+use comet::execution::kernels::hash;
+use common::*;
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::sync::Arc;
+
+const BATCH_SIZE: usize = 1024 * 8;
+const NUM_ITER: usize = 10;
+const NULL_FRACTION: f32 = 0.1;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hash");
+
+    let a1: ArrayRef = Arc::new(create_int64_array(BATCH_SIZE, 0.0, 0, BATCH_SIZE as i64));
+    let a2: ArrayRef = Arc::new(create_int64_array(BATCH_SIZE, 0.0, 0, BATCH_SIZE as i64));
+    let a3: ArrayRef = Arc::new(create_int64_array(
+        BATCH_SIZE,
+        NULL_FRACTION,
+        0,
+        BATCH_SIZE as i64,
+    ));
+    let a4: ArrayRef = Arc::new(create_int64_array(
+        BATCH_SIZE,
+        NULL_FRACTION,
+        0,
+        BATCH_SIZE as i64,
+    ));
+
+    group.bench_function(
+        BenchmarkId::new("hash_i64_single_nonnull", BATCH_SIZE),
+        |b| {
+            let input = vec![a1.clone()];
+            let mut dst = vec![0; BATCH_SIZE];
+
+            b.iter(|| {
+                for _ in 0..NUM_ITER {
+                    hash(&input, &mut dst);
+                }
+            });
+        },
+    );
+    group.bench_function(BenchmarkId::new("hash_i64_single_null", BATCH_SIZE), |b| {
+        let input = vec![a3.clone()];
+        let mut dst = vec![0; BATCH_SIZE];
+
+        b.iter(|| {
+            for _ in 0..NUM_ITER {
+                hash(&input, &mut dst);
+            }
+        });
+    });
+    group.bench_function(
+        BenchmarkId::new("hash_i64_multiple_nonnull", BATCH_SIZE),
+        |b| {
+            let input = vec![a1.clone(), a2.clone()];
+            let mut dst = vec![0; BATCH_SIZE];
+
+            b.iter(|| {
+                for _ in 0..NUM_ITER {
+                    hash(&input, &mut dst);
+                }
+            });
+        },
+    );
+    group.bench_function(
+        BenchmarkId::new("hash_i64_multiple_null", BATCH_SIZE),
+        |b| {
+            let input = vec![a3.clone(), a4.clone()];
+            let mut dst = vec![0; BATCH_SIZE];
+
+            b.iter(|| {
+                for _ in 0..NUM_ITER {
+                    hash(&input, &mut dst);
+                }
+            });
+        },
+    );
+}
+
+fn config() -> Criterion {
+    Criterion::default()
+}
+
+criterion_group! {
+    name = benches;
+    config = config();
+    targets = criterion_benchmark
+}
+criterion_main!(benches);
diff --git a/core/benches/parquet_read.rs b/core/benches/parquet_read.rs
new file mode 100644
index 000000000..7dcfab7a4
--- /dev/null
+++ b/core/benches/parquet_read.rs
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod perf;
+
+use std::sync::Arc;
+
+use arrow::{array::ArrayData, buffer::Buffer};
+use comet::parquet::{read::ColumnReader, util::jni::TypePromotionInfo};
+use criterion::{criterion_group, criterion_main, Criterion};
+use parquet::{
+    basic::{Encoding, Type as PhysicalType},
+    column::page::{PageIterator, PageReader},
+    data_type::Int32Type,
+    schema::types::{
+        ColumnDescPtr, ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder, SchemaDescPtr, TypePtr,
+    },
+};
+
+use comet::parquet::util::test_common::page_util::{
+    DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator,
+};
+
+use perf::FlamegraphProfiler;
+use rand::{prelude::StdRng, Rng, SeedableRng};
+
+fn bench(c: &mut Criterion) {
+    let expected_num_values: usize = NUM_PAGES * VALUES_PER_PAGE;
+    let mut group = c.benchmark_group("comet_parquet_read");
+    let schema = build_test_schema();
+
+    let pages = build_plain_int32_pages(schema.clone(), schema.column(0), 0.0);
+    group.bench_function("INT/PLAIN/NOT_NULL", |b| {
+        let t = TypePtr::new(
+            PrimitiveTypeBuilder::new("f", PhysicalType::INT32)
+                .with_length(4)
+                .build()
+                .unwrap(),
+        );
+        b.iter(|| {
+            let cd = ColumnDescriptor::new(t.clone(), 0, 0, ColumnPath::from(Vec::new()));
+            let promition_info = TypePromotionInfo::new(PhysicalType::INT32, -1);
+            let mut column_reader = TestColumnReader::new(
+                cd,
+                promition_info,
+                BATCH_SIZE,
+                pages.clone(),
+                expected_num_values,
+            );
+
+            let mut total = 0;
+            for batch in column_reader.by_ref() {
+                total += batch.len();
+                ::std::mem::forget(batch);
+            }
+            assert_eq!(total, expected_num_values);
+        });
+    });
+}
+
+fn profiled() -> Criterion {
+    Criterion::default().with_profiler(FlamegraphProfiler::new(100))
+}
+
+criterion_group! {
+    name = benches;
+    config = profiled();
+    targets = bench
+}
+criterion_main!(benches);
+
+fn build_test_schema() -> SchemaDescPtr {
+    use parquet::schema::{parser::parse_message_type, types::SchemaDescriptor};
+    let message_type = "
+        message test_schema {
+            REQUIRED INT32 c1;
+            OPTIONAL INT32 c2;
+        }
+        ";
+    parse_message_type(message_type)
+        .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
+        .unwrap()
+}
+
+fn seedable_rng() -> StdRng {
+    StdRng::seed_from_u64(42)
+}
+
+// test data params
+const NUM_PAGES: usize = 1000;
+const VALUES_PER_PAGE: usize = 10_000;
+const BATCH_SIZE: usize = 4096;
+
+fn build_plain_int32_pages(
+    schema: SchemaDescPtr,
+    column_desc: ColumnDescPtr,
+    null_density: f32,
+) -> impl PageIterator + Clone {
+    let max_def_level = column_desc.max_def_level();
+    let max_rep_level = column_desc.max_rep_level();
+    let rep_levels = vec![0; VALUES_PER_PAGE];
+    let mut rng = seedable_rng();
+    let mut pages: Vec<parquet::column::page::Page> = Vec::new();
+    let mut int32_value = 0;
+    for _ in 0..NUM_PAGES {
+        // generate page
+        let mut values = Vec::with_capacity(VALUES_PER_PAGE);
+        let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE);
+        for _ in 0..VALUES_PER_PAGE {
+            let def_level = if rng.gen::<f32>() < null_density {
+                max_def_level - 1
+            } else {
+                max_def_level
+            };
+            if def_level == max_def_level {
+                int32_value += 1;
+                values.push(int32_value);
+            }
+            def_levels.push(def_level);
+        }
+        let mut page_builder =
+            DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true);
+        page_builder.add_rep_levels(max_rep_level, &rep_levels);
+        page_builder.add_def_levels(max_def_level, &def_levels);
+        page_builder.add_values::<Int32Type>(Encoding::PLAIN, &values);
+        pages.push(page_builder.consume());
+    }
+
+    // Since `InMemoryPageReader` is not exposed from parquet crate, here we use
+    // `InMemoryPageIterator` instead which is a Iter<Iter<Page>>.
+    InMemoryPageIterator::new(schema, column_desc, vec![pages])
+}
+
+struct TestColumnReader {
+    inner: ColumnReader,
+    pages: Box<dyn PageReader>,
+    batch_size: usize,
+    total_num_values: usize,
+    total_num_values_read: usize,
+    first_page_loaded: bool,
+}
+
+impl TestColumnReader {
+    pub fn new(
+        cd: ColumnDescriptor,
+        promotion_info: TypePromotionInfo,
+        batch_size: usize,
+        mut page_iter: impl PageIterator + 'static,
+        total_num_values: usize,
+    ) -> Self {
+        let reader = ColumnReader::get(cd, promotion_info, batch_size, false, false);
+        let first = page_iter.next().unwrap().unwrap();
+        Self {
+            inner: reader,
+            pages: first,
+            batch_size,
+            total_num_values,
+            total_num_values_read: 0,
+            first_page_loaded: false,
+        }
+    }
+
+    fn load_page(&mut self) {
+        if let Some(page) = self.pages.get_next_page().unwrap() {
+            let num_values = page.num_values() as usize;
+            let buffer = Buffer::from_slice_ref(page.buffer().data());
+            self.inner.set_page_v1(num_values, buffer, page.encoding());
+        }
+    }
+}
+
+impl Iterator for TestColumnReader {
+    type Item = ArrayData;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.total_num_values_read >= self.total_num_values {
+            return None;
+        }
+
+        if !self.first_page_loaded {
+            self.load_page();
+            self.first_page_loaded = true;
+        }
+
+        self.inner.reset_batch();
+        let total = ::std::cmp::min(
+            self.batch_size,
+            self.total_num_values - self.total_num_values_read,
+        );
+
+        let mut left = total;
+        while left > 0 {
+            let (num_read, _) = self.inner.read_batch(left, 0);
+            if num_read < left {
+                self.load_page();
+            }
+            left -= num_read;
+        }
+        self.total_num_values_read += total;
+
+        Some(self.inner.current_batch())
+    }
+}
diff --git a/core/benches/perf.rs b/core/benches/perf.rs
new file mode 100644
index 000000000..f92ec0250
--- /dev/null
+++ b/core/benches/perf.rs
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{fs::File, os::raw::c_int, path::Path};
+
+use criterion::profiler::Profiler;
+use pprof::ProfilerGuard;
+
+/// A custom profiler for criterion which generates flamegraph.
+///
+/// Mostly followed this blog post: https://www.jibbow.com/posts/criterion-flamegraphs/
+/// After `cargo bench --bench <bench-name> -- --profile-time=<time>`
+/// You can find flamegraph.svg under `target/criterion/<bench-name>/<bench-method-name>/profile`
+pub struct FlamegraphProfiler<'a> {
+    frequency: c_int,
+    active_profiler: Option<ProfilerGuard<'a>>,
+}
+
+impl<'a> FlamegraphProfiler<'a> {
+    pub fn new(frequency: c_int) -> Self {
+        FlamegraphProfiler {
+            frequency,
+            active_profiler: None,
+        }
+    }
+}
+
+impl<'a> Profiler for FlamegraphProfiler<'a> {
+    fn start_profiling(&mut self, _benchmark_id: &str, _benchmark_dir: &Path) {
+        self.active_profiler = Some(ProfilerGuard::new(self.frequency).unwrap());
+    }
+
+    fn stop_profiling(&mut self, _benchmark_id: &str, benchmark_dir: &Path) {
+        std::fs::create_dir_all(benchmark_dir).unwrap();
+        let flamegraph_path = benchmark_dir.join("flamegraph.svg");
+        let flamegraph_file =
+            File::create(flamegraph_path).expect("File system error while creating flamegraph.svg");
+        if let Some(profiler) = self.active_profiler.take() {
+            profiler
+                .report()
+                .build()
+                .unwrap()
+                .flamegraph(flamegraph_file)
+                .expect("Error writing flamegraph");
+        }
+    }
+}
diff --git a/core/build.rs b/core/build.rs
new file mode 100644
index 000000000..4322bbbc9
--- /dev/null
+++ b/core/build.rs
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Build script for generating codes from .proto files.
+
+use std::{fs, io::Result, path::Path};
+
+fn main() -> Result<()> {
+    println!("cargo:rerun-if-changed=src/execution/proto/*.proto");
+
+    let out_dir = "src/execution/generated";
+    if !Path::new(out_dir).is_dir() {
+        fs::create_dir(out_dir)?;
+    }
+
+    prost_build::Config::new().out_dir(out_dir).compile_protos(
+        &[
+            "src/execution/proto/expr.proto",
+            "src/execution/proto/partitioning.proto",
+            "src/execution/proto/operator.proto",
+        ],
+        &["src/execution/proto"],
+    )?;
+    Ok(())
+}
diff --git a/core/rustfmt.toml b/core/rustfmt.toml
new file mode 100644
index 000000000..39a3fe635
--- /dev/null
+++ b/core/rustfmt.toml
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+edition = "2021"
+max_width = 100
+
+# The following requires nightly feature:
+#  rustup install nightly
+#  rustup component add rustfmt --toolchain nightly
+#  cargo +nightly fmt
+wrap_comments = true
+comment_width = 100
+imports_granularity = "Crate" # group imports by crate
diff --git a/core/src/common/bit.rs b/core/src/common/bit.rs
new file mode 100644
index 000000000..4af560ffc
--- /dev/null
+++ b/core/src/common/bit.rs
@@ -0,0 +1,1648 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{cmp, cmp::min, mem::size_of};
+
+use arrow::buffer::Buffer;
+
+use crate::{
+    errors::CometResult as Result,
+    likely,
+    parquet::{data_type::AsBytes, util::bit_packing::unpack32},
+    unlikely,
+};
+
+#[inline]
+pub fn from_ne_slice<T: FromBytes>(bs: &[u8]) -> T {
+    let mut b = T::Buffer::default();
+    {
+        let b = b.as_mut();
+        let bs = &bs[..b.len()];
+        b.copy_from_slice(bs);
+    }
+    T::from_ne_bytes(b)
+}
+
+pub trait FromBytes: Sized {
+    type Buffer: AsMut<[u8]> + Default;
+    fn from_le_bytes(bs: Self::Buffer) -> Self;
+    fn from_be_bytes(bs: Self::Buffer) -> Self;
+    fn from_ne_bytes(bs: Self::Buffer) -> Self;
+    fn from(v: u64) -> Self;
+}
+
+macro_rules! from_le_bytes {
+    ($($ty: ty),*) => {
+        $(
+        impl FromBytes for $ty {
+            type Buffer = [u8; size_of::<Self>()];
+            fn from_le_bytes(bs: Self::Buffer) -> Self {
+                <$ty>::from_le_bytes(bs)
+            }
+            fn from_be_bytes(bs: Self::Buffer) -> Self {
+                <$ty>::from_be_bytes(bs)
+            }
+            fn from_ne_bytes(bs: Self::Buffer) -> Self {
+                <$ty>::from_ne_bytes(bs)
+            }
+            fn from(v: u64) -> Self {
+                v as $ty
+            }
+        }
+        )*
+    };
+}
+
+impl FromBytes for bool {
+    type Buffer = [u8; 1];
+    fn from_le_bytes(bs: Self::Buffer) -> Self {
+        Self::from_ne_bytes(bs)
+    }
+    fn from_be_bytes(bs: Self::Buffer) -> Self {
+        Self::from_ne_bytes(bs)
+    }
+    fn from_ne_bytes(bs: Self::Buffer) -> Self {
+        match bs[0] {
+            0 => false,
+            1 => true,
+            _ => panic!("Invalid byte when reading bool"),
+        }
+    }
+    fn from(v: u64) -> Self {
+        (v & 1) == 1
+    }
+}
+
+// TODO: support f32 and f64 in the future, but there is no use case right now
+//       f32/f64::from(v: u64) will be like `from_ne_slice(v.as_bytes()))` and that is
+//       expensive as it involves copying buffers
+from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64 }
+
+/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
+/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
+/// compile.
+/// This is copied and modified from byteorder crate.
+macro_rules! read_num_bytes {
+    ($ty:ty, $size:expr, $src:expr) => {{
+        debug_assert!($size <= $src.len());
+        let mut buffer = <$ty as $crate::common::bit::FromBytes>::Buffer::default();
+        buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
+        <$ty>::from_ne_bytes(buffer)
+    }};
+}
+
+/// u64 specific version of read_num_bytes!
+/// This is faster than read_num_bytes! because this method avoids buffer copies.
+#[inline]
+pub fn read_num_bytes_u64(size: usize, src: &[u8]) -> u64 {
+    debug_assert!(size <= src.len());
+    if unlikely(src.len() < 8) {
+        return read_num_bytes!(u64, size, src);
+    }
+    let in_ptr = src as *const [u8] as *const u8 as *const u64;
+    let v = unsafe { in_ptr.read_unaligned() };
+    trailing_bits(v, size * 8)
+}
+
+/// u32 specific version of read_num_bytes!
+/// This is faster than read_num_bytes! because this method avoids buffer copies.
+#[inline]
+pub fn read_num_bytes_u32(size: usize, src: &[u8]) -> u32 {
+    debug_assert!(size <= src.len());
+    if unlikely(src.len() < 4) {
+        return read_num_bytes!(u32, size, src);
+    }
+    let in_ptr = src as *const [u8] as *const u8 as *const u32;
+    let v = unsafe { in_ptr.read_unaligned() };
+    trailing_bits(v as u64, size * 8) as u32
+}
+
+/// Converts value `val` of type `T` to a byte vector, by reading `num_bytes` from `val`.
+/// NOTE: if `val` is less than the size of `T` then it can be truncated.
+#[inline]
+pub fn convert_to_bytes<T>(val: &T, num_bytes: usize) -> Vec<u8>
+where
+    T: ?Sized + AsBytes,
+{
+    let mut bytes: Vec<u8> = vec![0; num_bytes];
+    memcpy_value(val.as_bytes(), num_bytes, &mut bytes);
+    bytes
+}
+
+#[inline]
+pub fn memcpy(source: &[u8], target: &mut [u8]) {
+    debug_assert!(target.len() >= source.len(), "Copying from source to target is not possible. Source has {} bytes but target has {} bytes", source.len(), target.len());
+    target[..source.len()].copy_from_slice(source)
+}
+
+#[inline]
+pub fn memcpy_value<T>(source: &T, num_bytes: usize, target: &mut [u8])
+where
+    T: ?Sized + AsBytes,
+{
+    debug_assert!(
+        target.len() >= num_bytes,
+        "Not enough space. Only had {} bytes but need to put {} bytes",
+        target.len(),
+        num_bytes
+    );
+    memcpy(&source.as_bytes()[..num_bytes], target)
+}
+
+/// Returns the ceil of value/divisor
+#[inline]
+pub fn ceil(value: usize, divisor: usize) -> usize {
+    value / divisor + ((value % divisor != 0) as usize)
+}
+
+/// Returns ceil(log2(x))
+#[inline]
+pub fn log2(mut x: u64) -> u32 {
+    if x == 1 {
+        return 0;
+    }
+    x -= 1;
+    64u32 - x.leading_zeros()
+}
+
+/// Returns the `num_bits` least-significant bits of `v`
+#[inline]
+pub fn trailing_bits(v: u64, num_bits: usize) -> u64 {
+    if unlikely(num_bits == 0) {
+        return 0;
+    }
+    if unlikely(num_bits >= 64) {
+        return v;
+    }
+    let n = 64 - num_bits;
+    (v << n) >> n
+}
+
+pub fn set_bit_value(bits: &mut [u8], i: usize, val: bool) {
+    bits[i / 8] |= (val as u8) << (i % 8);
+}
+
+#[inline]
+pub fn set_bit(bits: &mut [u8], i: usize) {
+    bits[i / 8] |= 1 << (i % 8);
+}
+
+/// Set the bit value at index `i`, for buffer `bits`.
+///
+/// # Safety
+/// This doesn't check bounds, the caller must ensure that `i` is in (0, bits.len() * 8)
+#[inline]
+pub unsafe fn set_bit_raw(bits: *mut u8, i: usize) {
+    *bits.add(i / 8) |= 1 << (i % 8);
+}
+
+#[inline]
+pub fn unset_bit(bits: &mut [u8], i: usize) {
+    bits[i / 8] &= !(1 << (i % 8));
+}
+
+#[inline]
+pub fn set_bits(bits: &mut [u8], offset: usize, length: usize) {
+    let mut byte_i = offset / 8;
+    let offset_r = offset % 8;
+    let end = offset + length;
+    let end_byte_i = end / 8;
+    let end_r = end % 8;
+
+    // if the offset starts in the middle of a byte, update the byte first
+    if offset_r != 0 {
+        let num_bits = min(length, 7);
+        bits[byte_i] |= ((1u8 << num_bits) - 1) << offset_r;
+        byte_i += 1;
+    }
+
+    // See if there is an opportunity to do a bulk byte write
+    if byte_i < end_byte_i {
+        unsafe {
+            bits.as_mut_ptr()
+                .add(byte_i)
+                .write_bytes(255, end_byte_i - byte_i);
+        }
+        byte_i = end_byte_i;
+    }
+
+    // take care of the last byte
+    if end_r > 0 && (byte_i == end_byte_i) {
+        bits[byte_i] |= (1u8 << end_r) - 1;
+    }
+}
+
+/// Returns the minimum number of bits needed to represent the value 'x'
+#[inline]
+pub fn num_required_bits(x: u64) -> usize {
+    for i in (0..64).rev() {
+        if x & (1u64 << i) != 0 {
+            return i + 1;
+        }
+    }
+    0
+}
+
+#[inline(always)]
+pub fn mix_hash(lower: u64, upper: u64) -> u64 {
+    let hash = (17 * 37u64).wrapping_add(lower);
+    hash.wrapping_mul(37).wrapping_add(upper)
+}
+
+static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
+
+/// Returns whether bit at position `i` in `data` is set or not
+#[inline]
+pub fn get_bit(data: &[u8], i: usize) -> bool {
+    (data[i >> 3] & BIT_MASK[i & 7]) != 0
+}
+
+/// Returns the boolean value at index `i`.
+///
+/// # Safety
+/// This doesn't check bounds, the caller must ensure that index < self.len()
+#[inline]
+pub unsafe fn get_bit_raw(ptr: *const u8, i: usize) -> bool {
+    (*ptr.add(i >> 3) & BIT_MASK[i & 7]) != 0
+}
+
+/// Utility class for writing bit/byte streams. This class can write data in either
+/// bit packed or byte aligned fashion.
+pub struct BitWriter {
+    buffer: Vec<u8>,
+    max_bytes: usize,
+    buffered_values: u64,
+    byte_offset: usize,
+    bit_offset: usize,
+    start: usize,
+}
+
+impl BitWriter {
+    pub fn new(max_bytes: usize) -> Self {
+        Self {
+            buffer: vec![0; max_bytes],
+            max_bytes,
+            buffered_values: 0,
+            byte_offset: 0,
+            bit_offset: 0,
+            start: 0,
+        }
+    }
+
+    /// Initializes the writer from the existing buffer `buffer` and starting
+    /// offset `start`.
+    pub fn new_from_buf(buffer: Vec<u8>, start: usize) -> Self {
+        debug_assert!(start < buffer.len());
+        let len = buffer.len();
+        Self {
+            buffer,
+            max_bytes: len,
+            buffered_values: 0,
+            byte_offset: start,
+            bit_offset: 0,
+            start,
+        }
+    }
+
+    /// Extend buffer size by `increment` bytes
+    #[inline]
+    pub fn extend(&mut self, increment: usize) {
+        self.max_bytes += increment;
+        let extra = vec![0; increment];
+        self.buffer.extend(extra);
+    }
+
+    /// Report buffer size, in bytes
+    #[inline]
+    pub fn capacity(&mut self) -> usize {
+        self.max_bytes
+    }
+
+    /// Consumes and returns the current buffer.
+    #[inline]
+    pub fn consume(mut self) -> Vec<u8> {
+        self.flush();
+        self.buffer.truncate(self.byte_offset);
+        self.buffer
+    }
+
+    /// Flushes the internal buffered bits and returns the buffer's content.
+    /// This is a borrow equivalent of `consume` method.
+    #[inline]
+    pub fn flush_buffer(&mut self) -> &[u8] {
+        self.flush();
+        &self.buffer()[0..self.byte_offset]
+    }
+
+    /// Clears the internal state so the buffer can be reused.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.buffered_values = 0;
+        self.byte_offset = self.start;
+        self.bit_offset = 0;
+    }
+
+    /// Flushes the internal buffered bits and the align the buffer to the next byte.
+    #[inline]
+    pub fn flush(&mut self) {
+        let num_bytes = ceil(self.bit_offset, 8);
+        debug_assert!(self.byte_offset + num_bytes <= self.max_bytes);
+        memcpy_value(
+            &self.buffered_values,
+            num_bytes,
+            &mut self.buffer[self.byte_offset..],
+        );
+        self.buffered_values = 0;
+        self.bit_offset = 0;
+        self.byte_offset += num_bytes;
+    }
+
+    /// Advances the current offset by skipping `num_bytes`, flushing the internal bit
+    /// buffer first.
+    /// This is useful when you want to jump over `num_bytes` bytes and come back later
+    /// to fill these bytes.
+    ///
+    /// Returns error if `num_bytes` is beyond the boundary of the internal buffer.
+    /// Otherwise, returns the old offset.
+    #[inline]
+    pub fn skip(&mut self, num_bytes: usize) -> Result<usize> {
+        self.flush();
+        debug_assert!(self.byte_offset <= self.max_bytes);
+        if unlikely(self.byte_offset + num_bytes > self.max_bytes) {
+            return Err(general_err!(
+                "Not enough bytes left in BitWriter. Need {} but only have {}",
+                self.byte_offset + num_bytes,
+                self.max_bytes
+            ));
+        }
+        let result = self.byte_offset;
+        self.byte_offset += num_bytes;
+        Ok(result)
+    }
+
+    /// Returns a slice containing the next `num_bytes` bytes starting from the current
+    /// offset, and advances the underlying buffer by `num_bytes`.
+    /// This is useful when you want to jump over `num_bytes` bytes and come back later
+    /// to fill these bytes.
+    #[inline]
+    pub fn get_next_byte_ptr(&mut self, num_bytes: usize) -> Result<&mut [u8]> {
+        let offset = self.skip(num_bytes)?;
+        Ok(&mut self.buffer[offset..offset + num_bytes])
+    }
+
+    #[inline]
+    pub fn bytes_written(&self) -> usize {
+        self.byte_offset - self.start + ceil(self.bit_offset, 8)
+    }
+
+    #[inline]
+    pub fn buffer(&self) -> &[u8] {
+        &self.buffer[self.start..]
+    }
+
+    #[inline]
+    pub fn byte_offset(&self) -> usize {
+        self.byte_offset
+    }
+
+    /// Returns the internal buffer length. This is the maximum number of bytes that this
+    /// writer can write. User needs to call `consume` to consume the current buffer
+    /// before more data can be written.
+    #[inline]
+    pub fn buffer_len(&self) -> usize {
+        self.max_bytes
+    }
+
+    /// Writes the entire byte `value` at the byte `offset`
+    pub fn write_at(&mut self, offset: usize, value: u8) {
+        self.buffer[offset] = value;
+    }
+
+    /// Writes the `num_bits` LSB of value `v` to the internal buffer of this writer.
+    /// The `num_bits` must not be greater than 64. This is bit packed.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    #[allow(clippy::unnecessary_cast)]
+    pub fn put_value(&mut self, v: u64, num_bits: usize) -> bool {
+        debug_assert!(num_bits <= 64);
+        debug_assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
+
+        let num_bytes = self.byte_offset * 8 + self.bit_offset + num_bits;
+        if unlikely(num_bytes > self.max_bytes as usize * 8) {
+            return false;
+        }
+
+        self.buffered_values |= v << self.bit_offset;
+        self.bit_offset += num_bits;
+        if self.bit_offset >= 64 {
+            memcpy_value(
+                &self.buffered_values,
+                8,
+                &mut self.buffer[self.byte_offset..],
+            );
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+            self.buffered_values = 0;
+            // Perform checked right shift: v >> offset, where offset < 64, otherwise we
+            // shift all bits
+            self.buffered_values = v
+                .checked_shr((num_bits - self.bit_offset) as u32)
+                .unwrap_or(0);
+        }
+        debug_assert!(self.bit_offset < 64);
+        true
+    }
+
+    /// Writes `val` of `num_bytes` bytes to the next aligned byte. If size of `T` is
+    /// larger than `num_bytes`, extra higher ordered bytes will be ignored.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_aligned<T: AsBytes>(&mut self, val: T, num_bytes: usize) -> bool {
+        let result = self.get_next_byte_ptr(num_bytes);
+        if unlikely(result.is_err()) {
+            // TODO: should we return `Result` for this func?
+            return false;
+        }
+        let ptr = result.unwrap();
+        memcpy_value(&val, num_bytes, ptr);
+        true
+    }
+
+    /// Writes `val` of `num_bytes` bytes at the designated `offset`. The `offset` is the
+    /// offset starting from the beginning of the internal buffer that this writer
+    /// maintains. Note that this will overwrite any existing data between `offset` and
+    /// `offset + num_bytes`. Also that if size of `T` is larger than `num_bytes`, extra
+    /// higher ordered bytes will be ignored.
+    ///
+    /// Returns false if there's not enough room left, or the `pos` is not valid.
+    /// True otherwise.
+    #[inline]
+    pub fn put_aligned_offset<T: AsBytes>(
+        &mut self,
+        val: T,
+        num_bytes: usize,
+        offset: usize,
+    ) -> bool {
+        if unlikely(num_bytes + offset > self.max_bytes) {
+            return false;
+        }
+        memcpy_value(
+            &val,
+            num_bytes,
+            &mut self.buffer[offset..offset + num_bytes],
+        );
+        true
+    }
+
+    /// Writes a VLQ encoded integer `v` to this buffer. The value is byte aligned.
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_vlq_int(&mut self, mut v: u64) -> bool {
+        let mut result = true;
+        while v & 0xFFFFFFFFFFFFFF80 != 0 {
+            result &= self.put_aligned::<u8>(((v & 0x7F) | 0x80) as u8, 1);
+            v >>= 7;
+        }
+        result &= self.put_aligned::<u8>((v & 0x7F) as u8, 1);
+        result
+    }
+
+    /// Writes a zigzag-VLQ encoded (in little endian order) int `v` to this buffer.
+    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive
+    /// numbers are encoded in a zigzag fashion.
+    /// See: https://developers.google.com/protocol-buffers/docs/encoding
+    ///
+    /// Returns false if there's not enough room left. True otherwise.
+    #[inline]
+    pub fn put_zigzag_vlq_int(&mut self, v: i64) -> bool {
+        let u: u64 = ((v << 1) ^ (v >> 63)) as u64;
+        self.put_vlq_int(u)
+    }
+}
+
+/// Maximum byte length for a VLQ encoded integer
+/// MAX_VLQ_BYTE_LEN = 5 for i32, and MAX_VLQ_BYTE_LEN = 10 for i64
+pub const MAX_VLQ_BYTE_LEN: usize = 10;
+
+pub struct BitReader {
+    /// The byte buffer to read from, passed in by client
+    buffer: Buffer, // TODO: generalize this
+
+    /// Bytes are memcpy'd from `buffer` and values are read from this variable.
+    /// This is faster than reading values byte by byte directly from `buffer`
+    buffered_values: u64,
+
+    ///
+    /// End                                         Start
+    /// |............|B|B|B|B|B|B|B|B|..............|
+    ///                   ^          ^
+    ///                 bit_offset   byte_offset
+    ///
+    /// Current byte offset in `buffer`
+    byte_offset: usize,
+
+    /// Current bit offset in `buffered_values`
+    bit_offset: usize,
+
+    /// Total number of bytes in `buffer`
+    total_bytes: usize,
+}
+
+/// Utility class to read bit/byte stream. This class can read bits or bytes that are
+/// either byte aligned or not.
+impl BitReader {
+    pub fn new(buf: Buffer, len: usize) -> Self {
+        let num_bytes = cmp::min(8, len);
+        let buffered_values = read_num_bytes_u64(num_bytes, buf.as_slice());
+        BitReader {
+            buffer: buf,
+            buffered_values,
+            byte_offset: 0,
+            bit_offset: 0,
+            total_bytes: len,
+        }
+    }
+
+    pub fn new_all(buf: Buffer) -> Self {
+        let len = buf.len();
+        Self::new(buf, len)
+    }
+
+    pub fn reset(&mut self, buf: Buffer) {
+        self.buffer = buf;
+        self.total_bytes = self.buffer.len();
+        let num_bytes = cmp::min(8, self.total_bytes);
+        self.buffered_values = read_num_bytes_u64(num_bytes, self.buffer.as_slice());
+        self.byte_offset = 0;
+        self.bit_offset = 0;
+    }
+
+    /// Gets the current byte offset
+    #[inline]
+    pub fn get_byte_offset(&self) -> usize {
+        self.byte_offset + ceil(self.bit_offset, 8)
+    }
+
+    /// Reads a value of type `T` and of size `num_bits`.
+    ///
+    /// Returns `None` if there's not enough data available. `Some` otherwise.
+    pub fn get_value<T: FromBytes>(&mut self, num_bits: usize) -> Option<T> {
+        debug_assert!(num_bits <= 64);
+        debug_assert!(num_bits <= size_of::<T>() * 8);
+
+        if unlikely(self.byte_offset * 8 + self.bit_offset + num_bits > self.total_bytes * 8) {
+            return None;
+        }
+
+        let mut v =
+            trailing_bits(self.buffered_values, self.bit_offset + num_bits) >> self.bit_offset;
+        self.bit_offset += num_bits;
+
+        if self.bit_offset >= 64 {
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+
+            self.reload_buffer_values();
+            v |= trailing_bits(self.buffered_values, self.bit_offset)
+                .wrapping_shl((num_bits - self.bit_offset) as u32);
+        }
+
+        Some(T::from(v))
+    }
+
+    /// Reads a `u32` value encoded using `num_bits` of bits.
+    ///
+    /// # Safety
+    ///
+    /// This method asusumes the following:
+    ///
+    /// - the `num_bits` is <= 64
+    /// - the remaining number of bits to read in this reader is >= `num_bits`.
+    ///
+    /// Undefined behavior will happen if any of the above assumptions is violated.
+    #[inline]
+    pub fn get_u32_value(&mut self, num_bits: usize) -> u32 {
+        let mut v =
+            trailing_bits(self.buffered_values, self.bit_offset + num_bits) >> self.bit_offset;
+        self.bit_offset += num_bits;
+
+        if self.bit_offset >= 64 {
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+
+            self.reload_buffer_values();
+            v |= trailing_bits(self.buffered_values, self.bit_offset)
+                .wrapping_shl((num_bits - self.bit_offset) as u32);
+        }
+
+        v as u32
+    }
+
+    /// Gets at most `num` bits from this reader, and append them to the `dst` byte slice, starting
+    /// at bit offset `offset`.
+    ///
+    /// Returns the actual number of bits appended. In case either the `dst` slice doesn't have
+    /// enough space or the current reader doesn't have enough bits to consume, the returned value
+    /// will be less than the input `num_bits`.
+    ///
+    /// # Preconditions
+    /// * `offset` MUST < dst.len() * 8
+    pub fn get_bits(&mut self, dst: &mut [u8], offset: usize, num_bits: usize) -> usize {
+        debug_assert!(offset < dst.len() * 8);
+
+        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
+        let num_bits_to_read = min(remaining_bits, min(num_bits, dst.len() * 8 - offset));
+        let mut i = 0;
+
+        // First consume all the remaining bits from the `buffered_values` if there're any.
+        if likely(self.bit_offset != 0) {
+            i += self.get_bits_buffered(dst, offset, num_bits_to_read);
+        }
+
+        debug_assert!(self.bit_offset == 0 || i == num_bits_to_read);
+
+        // Check if there's opportunity to directly copy bytes using `memcpy`.
+        if (offset + i) % 8 == 0 && i < num_bits_to_read {
+            let num_bytes = (num_bits_to_read - i) / 8;
+            let dst_byte_offset = (offset + i) / 8;
+            if num_bytes > 0 {
+                memcpy(
+                    &self.buffer[self.byte_offset..self.byte_offset + num_bytes],
+                    &mut dst[dst_byte_offset..],
+                );
+                i += num_bytes * 8;
+                self.byte_offset += num_bytes;
+                self.reload_buffer_values();
+            }
+        }
+
+        debug_assert!((offset + i) % 8 != 0 || num_bits_to_read - i < 8);
+
+        // Now copy the remaining bits if there's any.
+        while i < num_bits_to_read {
+            i += self.get_bits_buffered(dst, offset + i, num_bits_to_read - i);
+        }
+
+        num_bits_to_read
+    }
+
+    /// Consume at most `n` bits from `buffered_values`. Returns the actual number of bits consumed.
+    ///
+    /// # Postcondition
+    /// - either bits from `buffered_values` are completely drained (i.e., `bit_offset` == 0)
+    /// - OR the `num_bits` is < the number of remaining bits in `buffered_values` and thus the
+    ///   returned value is < `num_bits`.
+    ///
+    /// Either way, the returned value is in range [0, 64].
+    #[inline]
+    fn get_bits_buffered(&mut self, dst: &mut [u8], offset: usize, num_bits: usize) -> usize {
+        if unlikely(num_bits == 0) {
+            return 0;
+        }
+
+        let n = min(num_bits, 64 - self.bit_offset);
+        let offset_i = offset / 8;
+        let offset_r = offset % 8;
+
+        // Extract the value to read out of the buffer
+        let mut v = trailing_bits(self.buffered_values >> self.bit_offset, n);
+
+        // Read the first byte always because n > 0
+        dst[offset_i] |= (v << offset_r) as u8;
+        v >>= 8 - offset_r;
+
+        // Read the rest of the bytes
+        ((offset_i + 1)..(offset_i + ceil(n + offset_r, 8))).for_each(|i| {
+            dst[i] |= v as u8;
+            v >>= 8;
+        });
+
+        self.bit_offset += n;
+        if self.bit_offset == 64 {
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+            self.reload_buffer_values();
+        }
+
+        n
+    }
+
+    /// Skips at most `num` bits from this reader.
+    ///
+    /// Returns the actual number of bits skipped.
+    pub fn skip_bits(&mut self, num_bits: usize) -> usize {
+        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
+        let num_bits_to_read = min(remaining_bits, num_bits);
+        let mut i = 0;
+
+        // First skip all the remaining bits by updating the offsets of `buffered_values`.
+        if likely(self.bit_offset != 0) {
+            let n = 64 - self.bit_offset;
+            if num_bits_to_read < n {
+                self.bit_offset += num_bits_to_read;
+                i = num_bits_to_read;
+            } else {
+                self.byte_offset += 8;
+                self.bit_offset = 0;
+                i = n;
+            }
+        }
+
+        // Check if there's opportunity to skip by byte
+        if i + 7 < num_bits_to_read {
+            let num_bytes = (num_bits_to_read - i) / 8;
+            i += num_bytes * 8;
+            self.byte_offset += num_bytes;
+        }
+
+        if self.bit_offset == 0 {
+            self.reload_buffer_values();
+        }
+
+        // Now skip the remaining bits if there's any.
+        if i < num_bits_to_read {
+            self.bit_offset += num_bits_to_read - i;
+        }
+
+        num_bits_to_read
+    }
+
+    /// Reads a batch of `u32` values encoded using `num_bits` of bits, into `dst`.
+    ///
+    /// # Safety
+    ///
+    /// This method asusumes the following:
+    ///
+    /// - the `num_bits` is <= 64
+    /// - the remaining number of bits to read in this reader is >= `total * num_bits`.
+    ///
+    /// Undefined behavior will happen if any of the above assumptions is violated.
+    ///
+    /// Unlike `[get_batch]`, this method removes a few checks such as checking the remaining number
+    /// of bits as well as checking the bit width for the element type in `dst`. Therefore, it is
+    /// more efficient.
+    pub unsafe fn get_u32_batch(&mut self, mut dst: *mut u32, total: usize, num_bits: usize) {
+        let mut i = 0;
+
+        // First align bit offset to byte offset
+        if likely(self.bit_offset != 0) {
+            while i < total && self.bit_offset != 0 {
+                *dst = self.get_u32_value(num_bits);
+                dst = dst.offset(1);
+                i += 1;
+            }
+        }
+
+        let in_buf = &self.buffer.as_slice()[self.byte_offset..];
+        let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
+        while total - i >= 32 {
+            in_ptr = unpack32(in_ptr, dst, num_bits);
+            self.byte_offset += 4 * num_bits;
+            dst = dst.offset(32);
+            i += 32;
+        }
+
+        self.reload_buffer_values();
+        while i < total {
+            *dst = self.get_u32_value(num_bits);
+            dst = dst.offset(1);
+            i += 1;
+        }
+    }
+
+    pub fn get_batch<T: FromBytes>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
+        debug_assert!(num_bits <= 32);
+        debug_assert!(num_bits <= size_of::<T>() * 8);
+
+        let mut values_to_read = batch.len();
+        let needed_bits = num_bits * values_to_read;
+        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
+        if remaining_bits < needed_bits {
+            values_to_read = remaining_bits / num_bits;
+        }
+
+        let mut i = 0;
+
+        // First align bit offset to byte offset
+        if likely(self.bit_offset != 0) {
+            while i < values_to_read && self.bit_offset != 0 {
+                batch[i] = self
+                    .get_value(num_bits)
+                    .expect("expected to have more data");
+                i += 1;
+            }
+        }
+
+        unsafe {
+            let in_buf = &self.buffer.as_slice()[self.byte_offset..];
+            let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
+            // FIXME assert!(memory::is_ptr_aligned(in_ptr));
+            if size_of::<T>() == 4 {
+                while values_to_read - i >= 32 {
+                    let out_ptr = &mut batch[i..] as *mut [T] as *mut T as *mut u32;
+                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
+                    self.byte_offset += 4 * num_bits;
+                    i += 32;
+                }
+            } else {
+                let mut out_buf = [0u32; 32];
+                let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut u32;
+                while values_to_read - i >= 32 {
+                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
+                    self.byte_offset += 4 * num_bits;
+                    for n in 0..32 {
+                        // We need to copy from smaller size to bigger size to avoid
+                        // overwriting other memory regions.
+                        if size_of::<T>() > size_of::<u32>() {
+                            std::ptr::copy_nonoverlapping(
+                                out_buf[n..].as_ptr(),
+                                &mut batch[i] as *mut T as *mut u32,
+                                1,
+                            );
+                        } else {
+                            std::ptr::copy_nonoverlapping(
+                                out_buf[n..].as_ptr() as *const T,
+                                &mut batch[i] as *mut T,
+                                1,
+                            );
+                        }
+                        i += 1;
+                    }
+                }
+            }
+        }
+
+        debug_assert!(values_to_read - i < 32);
+
+        self.reload_buffer_values();
+        while i < values_to_read {
+            batch[i] = self
+                .get_value(num_bits)
+                .expect("expected to have more data");
+            i += 1;
+        }
+
+        values_to_read
+    }
+
+    /// Reads a `num_bytes`-sized value from this buffer and return it.
+    /// `T` needs to be a little-endian native type. The value is assumed to be byte
+    /// aligned so the bit reader will be advanced to the start of the next byte before
+    /// reading the value.
+
+    /// Returns `Some` if there's enough bytes left to form a value of `T`.
+    /// Otherwise `None`.
+    pub fn get_aligned<T: FromBytes>(&mut self, num_bytes: usize) -> Option<T> {
+        debug_assert!(8 >= size_of::<T>());
+        debug_assert!(num_bytes <= size_of::<T>());
+
+        let bytes_read = ceil(self.bit_offset, 8);
+        if unlikely(self.byte_offset + bytes_read + num_bytes > self.total_bytes) {
+            return None;
+        }
+
+        if bytes_read + num_bytes > 8 {
+            // There may be still unread bytes in buffered_values; however, just reloading seems to
+            // be faster than stitching the buffer with the next buffer based on micro benchmarks
+            // because reloading logic can be simpler
+
+            // Advance byte_offset to next unread byte
+            self.byte_offset += bytes_read;
+            // Reset buffered_values
+            self.reload_buffer_values();
+            self.bit_offset = 0
+        } else {
+            // Advance bit_offset to next unread byte
+            self.bit_offset = bytes_read * 8;
+        }
+
+        let v = T::from(trailing_bits(
+            self.buffered_values >> self.bit_offset,
+            num_bytes * 8,
+        ));
+        self.bit_offset += num_bytes * 8;
+
+        if self.bit_offset == 64 {
+            self.byte_offset += 8;
+            self.bit_offset -= 64;
+            self.reload_buffer_values();
+        }
+
+        Some(v)
+    }
+
+    /// Reads a VLQ encoded (in little endian order) int from the stream.
+    /// The encoded int must start at the beginning of a byte.
+    ///
+    /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise.
+    pub fn get_vlq_int(&mut self) -> Option<i64> {
+        let mut shift = 0;
+        let mut v: i64 = 0;
+        while let Some(byte) = self.get_aligned::<u8>(1) {
+            v |= ((byte & 0x7F) as i64) << shift;
+            shift += 7;
+            debug_assert!(
+                shift <= MAX_VLQ_BYTE_LEN * 7,
+                "Num of bytes exceed MAX_VLQ_BYTE_LEN ({})",
+                MAX_VLQ_BYTE_LEN
+            );
+            if likely(byte & 0x80 == 0) {
+                return Some(v);
+            }
+        }
+        None
+    }
+
+    /// Reads a zigzag-VLQ encoded (in little endian order) int from the stream
+    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive numbers are
+    /// encoded in a zigzag fashion.
+    /// See: https://developers.google.com/protocol-buffers/docs/encoding
+    ///
+    /// Note: the encoded int must start at the beginning of a byte.
+    ///
+    /// Returns `None` if the number of bytes there's not enough bytes in the stream.
+    /// `Some` otherwise.
+    #[inline]
+    pub fn get_zigzag_vlq_int(&mut self) -> Option<i64> {
+        self.get_vlq_int().map(|v| {
+            let u = v as u64;
+            (u >> 1) as i64 ^ -((u & 1) as i64)
+        })
+    }
+
+    fn reload_buffer_values(&mut self) {
+        let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
+        self.buffered_values =
+            read_num_bytes_u64(bytes_to_read, &self.buffer.as_slice()[self.byte_offset..]);
+    }
+}
+
+impl From<Vec<u8>> for BitReader {
+    #[inline]
+    fn from(vec: Vec<u8>) -> Self {
+        let len = vec.len();
+        BitReader::new(Buffer::from(vec), len)
+    }
+}
+
+/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
+/// be a power of 2.
+///
+/// Copied from the arrow crate to make arrow optional
+pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
+    debug_assert!(factor > 0 && (factor & (factor - 1)) == 0);
+    (num + (factor - 1)) & !(factor - 1)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::util::test_common::*;
+
+    use rand::{
+        distributions::{Distribution, Standard},
+        Rng,
+    };
+    use std::fmt::Debug;
+
+    #[test]
+    fn test_read_num_bytes_u64() {
+        let buffer: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7];
+        for size in 0..buffer.len() {
+            assert_eq!(
+                read_num_bytes_u64(size, &buffer),
+                read_num_bytes!(u64, size, &buffer),
+            );
+        }
+    }
+
+    #[test]
+    fn test_read_num_bytes_u32() {
+        let buffer: Vec<u8> = vec![0, 1, 2, 3];
+        for size in 0..buffer.len() {
+            assert_eq!(
+                read_num_bytes_u32(size, &buffer),
+                read_num_bytes!(u32, size, &buffer),
+            );
+        }
+    }
+
+    #[test]
+    fn test_ceil() {
+        assert_eq!(ceil(0, 1), 0);
+        assert_eq!(ceil(1, 1), 1);
+        assert_eq!(ceil(1, 2), 1);
+        assert_eq!(ceil(1, 8), 1);
+        assert_eq!(ceil(7, 8), 1);
+        assert_eq!(ceil(8, 8), 1);
+        assert_eq!(ceil(9, 8), 2);
+        assert_eq!(ceil(9, 9), 1);
+        assert_eq!(ceil(10000000000, 10), 1000000000);
+        assert_eq!(ceil(10, 10000000000), 1);
+        assert_eq!(ceil(10000000000, 1000000000), 10);
+    }
+
+    #[test]
+    fn test_bit_reader_get_byte_offset() {
+        let buffer = vec![255; 10];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_byte_offset(), 0); // offset (0 bytes, 0 bits)
+        bit_reader.get_value::<i32>(6);
+        assert_eq!(bit_reader.get_byte_offset(), 1); // offset (0 bytes, 6 bits)
+        bit_reader.get_value::<i32>(10);
+        assert_eq!(bit_reader.get_byte_offset(), 2); // offset (0 bytes, 16 bits)
+        bit_reader.get_value::<i32>(20);
+        assert_eq!(bit_reader.get_byte_offset(), 5); // offset (0 bytes, 36 bits)
+        bit_reader.get_value::<i32>(30);
+        assert_eq!(bit_reader.get_byte_offset(), 9); // offset (8 bytes, 2 bits)
+    }
+
+    #[test]
+    fn test_bit_reader_get_value() {
+        let buffer = vec![255, 0];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
+        assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
+        assert_eq!(bit_reader.get_value::<i32>(3), Some(7));
+        assert_eq!(bit_reader.get_value::<i32>(4), Some(3));
+    }
+
+    #[test]
+    fn test_bit_reader_get_value_boundary() {
+        let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
+        assert_eq!(bit_reader.get_value::<i64>(16), Some(20));
+        assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
+        assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
+    }
+
+    #[test]
+    fn test_bit_reader_get_aligned() {
+        // 01110101 11001011
+        let buffer = Buffer::from(vec![0x75, 0xCB]);
+        let mut bit_reader = BitReader::new_all(buffer.clone());
+        assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
+        assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
+        assert_eq!(bit_reader.get_value::<i32>(1), None);
+        bit_reader.reset(buffer);
+        assert_eq!(bit_reader.get_aligned::<i32>(3), None);
+    }
+
+    #[test]
+    fn test_bit_reader_get_vlq_int() {
+        // 10001001 00000001 11110010 10110101 00000110
+        let buffer: Vec<u8> = vec![0x89, 0x01, 0xF2, 0xB5, 0x06];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_vlq_int(), Some(137));
+        assert_eq!(bit_reader.get_vlq_int(), Some(105202));
+    }
+
+    #[test]
+    fn test_bit_reader_get_zigzag_vlq_int() {
+        let buffer: Vec<u8> = vec![0, 1, 2, 3];
+        let mut bit_reader = BitReader::from(buffer);
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(0));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-1));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(1));
+        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-2));
+    }
+
+    #[test]
+    fn test_set_bit() {
+        let mut buffer = vec![0, 0, 0];
+        set_bit(&mut buffer[..], 1);
+        assert_eq!(buffer, vec![2, 0, 0]);
+        set_bit(&mut buffer[..], 4);
+        assert_eq!(buffer, vec![18, 0, 0]);
+        unset_bit(&mut buffer[..], 1);
+        assert_eq!(buffer, vec![16, 0, 0]);
+        set_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 4, 0]);
+        set_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 4, 0]);
+        set_bit(&mut buffer[..], 11);
+        assert_eq!(buffer, vec![16, 12, 0]);
+        unset_bit(&mut buffer[..], 10);
+        assert_eq!(buffer, vec![16, 8, 0]);
+    }
+
+    #[test]
+    fn test_set_bits() {
+        for offset in 0..=16 {
+            for length in 0..=16 {
+                let mut actual = vec![0, 0, 0, 0];
+                set_bits(&mut actual[..], offset, length);
+                let mut expected = vec![0, 0, 0, 0];
+                for i in 0..length {
+                    set_bit(&mut expected, offset + i);
+                }
+                assert_eq!(actual, expected);
+            }
+        }
+    }
+
+    #[test]
+    fn test_num_required_bits() {
+        assert_eq!(num_required_bits(0), 0);
+        assert_eq!(num_required_bits(1), 1);
+        assert_eq!(num_required_bits(2), 2);
+        assert_eq!(num_required_bits(4), 3);
+        assert_eq!(num_required_bits(8), 4);
+        assert_eq!(num_required_bits(10), 4);
+        assert_eq!(num_required_bits(12), 4);
+        assert_eq!(num_required_bits(16), 5);
+    }
+
+    #[test]
+    fn test_get_bit() {
+        // 00001101
+        assert!(get_bit(&[0b00001101], 0));
+        assert!(!get_bit(&[0b00001101], 1));
+        assert!(get_bit(&[0b00001101], 2));
+        assert!(get_bit(&[0b00001101], 3));
+
+        // 01001001 01010010
+        assert!(get_bit(&[0b01001001, 0b01010010], 0));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 1));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 2));
+        assert!(get_bit(&[0b01001001, 0b01010010], 3));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 4));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 5));
+        assert!(get_bit(&[0b01001001, 0b01010010], 6));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 7));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 8));
+        assert!(get_bit(&[0b01001001, 0b01010010], 9));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 10));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 11));
+        assert!(get_bit(&[0b01001001, 0b01010010], 12));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 13));
+        assert!(get_bit(&[0b01001001, 0b01010010], 14));
+        assert!(!get_bit(&[0b01001001, 0b01010010], 15));
+    }
+
+    #[test]
+    fn test_log2() {
+        assert_eq!(log2(1), 0);
+        assert_eq!(log2(2), 1);
+        assert_eq!(log2(3), 2);
+        assert_eq!(log2(4), 2);
+        assert_eq!(log2(5), 3);
+        assert_eq!(log2(5), 3);
+        assert_eq!(log2(6), 3);
+        assert_eq!(log2(7), 3);
+        assert_eq!(log2(8), 3);
+        assert_eq!(log2(9), 4);
+    }
+
+    #[test]
+    fn test_skip() {
+        let mut writer = BitWriter::new(5);
+        let old_offset = writer.skip(1).expect("skip() should return OK");
+        writer.put_aligned(42, 4);
+        writer.put_aligned_offset(0x10, 1, old_offset);
+        let result = writer.consume();
+        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
+
+        writer = BitWriter::new(4);
+        let result = writer.skip(5);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_get_next_byte_ptr() {
+        let mut writer = BitWriter::new(5);
+        {
+            let first_byte = writer
+                .get_next_byte_ptr(1)
+                .expect("get_next_byte_ptr() should return OK");
+            first_byte[0] = 0x10;
+        }
+        writer.put_aligned(42, 4);
+        let result = writer.consume();
+        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
+    }
+
+    #[test]
+    fn test_consume_flush_buffer() {
+        let mut writer1 = BitWriter::new(3);
+        let mut writer2 = BitWriter::new(3);
+        for i in 1..10 {
+            writer1.put_value(i, 4);
+            writer2.put_value(i, 4);
+        }
+        let res1 = writer1.flush_buffer();
+        let res2 = writer2.consume();
+        assert_eq!(res1, &res2[..]);
+    }
+
+    #[test]
+    fn test_put_get_bool() {
+        let len = 8;
+        let mut writer = BitWriter::new(len);
+
+        for i in 0..8 {
+            let result = writer.put_value(i % 2, 1);
+            assert!(result);
+        }
+
+        writer.flush();
+        {
+            let buffer = writer.buffer();
+            assert_eq!(buffer[0], 0b10101010);
+        }
+
+        // Write 00110011
+        for i in 0..8 {
+            let result = match i {
+                0 | 1 | 4 | 5 => writer.put_value(false as u64, 1),
+                _ => writer.put_value(true as u64, 1),
+            };
+            assert!(result);
+        }
+        writer.flush();
+        {
+            let buffer = writer.buffer();
+            assert_eq!(buffer[0], 0b10101010);
+            assert_eq!(buffer[1], 0b11001100);
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+
+        for i in 0..8 {
+            let val = reader
+                .get_value::<u8>(1)
+                .expect("get_value() should return OK");
+            assert_eq!(val, i % 2);
+        }
+
+        for i in 0..8 {
+            let val = reader
+                .get_value::<bool>(1)
+                .expect("get_value() should return OK");
+            match i {
+                0 | 1 | 4 | 5 => assert!(!val),
+                _ => assert!(val),
+            }
+        }
+    }
+
+    #[test]
+    fn test_put_value_roundtrip() {
+        test_put_value_rand_numbers(32, 2);
+        test_put_value_rand_numbers(32, 3);
+        test_put_value_rand_numbers(32, 4);
+        test_put_value_rand_numbers(32, 5);
+        test_put_value_rand_numbers(32, 6);
+        test_put_value_rand_numbers(32, 7);
+        test_put_value_rand_numbers(32, 8);
+        test_put_value_rand_numbers(64, 16);
+        test_put_value_rand_numbers(64, 24);
+        test_put_value_rand_numbers(64, 32);
+    }
+
+    fn test_put_value_rand_numbers(total: usize, num_bits: usize) {
+        assert!(num_bits < 64);
+        let num_bytes = ceil(num_bits, 8);
+        let mut writer = BitWriter::new(num_bytes * total);
+        let values: Vec<u64> = random_numbers::<u64>(total)
+            .iter()
+            .map(|v| v & ((1 << num_bits) - 1))
+            .collect();
+        (0..total).for_each(|i| {
+            assert!(
+                writer.put_value(values[i], num_bits),
+                "[{}]: put_value() failed",
+                i
+            );
+        });
+
+        let mut reader = BitReader::from(writer.consume());
+        (0..total).for_each(|i| {
+            let v = reader
+                .get_value::<u64>(num_bits)
+                .expect("get_value() should return OK");
+            assert_eq!(
+                v, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        });
+    }
+
+    #[test]
+    fn test_get_bits() {
+        const NUM_BYTES: usize = 100;
+
+        let mut vec = vec![0; NUM_BYTES];
+        let total_num_bits = NUM_BYTES * 8;
+        let v = random_bools(total_num_bits);
+        (0..total_num_bits).for_each(|i| {
+            if v[i] {
+                set_bit(&mut vec, i);
+            } else {
+                unset_bit(&mut vec, i);
+            }
+        });
+
+        let expected = vec.clone();
+
+        // test reading the first time from a buffer
+        for &(offset, num_bits) in [(0, 10), (2, 10), (8, 16), (25, 40), (7, 64)].iter() {
+            let mut reader = BitReader::from(vec.clone());
+            let mut buffer = vec![0; NUM_BYTES];
+
+            let actual_bits_read = reader.get_bits(&mut buffer, offset, num_bits);
+            let expected_bits_read = ::std::cmp::min(buffer.len() * 8 - offset, num_bits);
+            assert_eq!(expected_bits_read, actual_bits_read);
+
+            for i in 0..actual_bits_read {
+                assert_eq!(get_bit(&expected, i), get_bit(&buffer, offset + i));
+            }
+        }
+
+        // test reading consecutively from a buffer
+        let mut reader = BitReader::from(vec);
+        let mut buffer = vec![0; NUM_BYTES];
+        let mut rng = rand::thread_rng();
+        let mut bits_read = 0;
+
+        loop {
+            if bits_read >= total_num_bits {
+                break;
+            }
+            let n: usize = rng.gen();
+            let num_bits = n % 20;
+            bits_read += reader.get_bits(&mut buffer, bits_read, num_bits);
+        }
+
+        assert_eq!(total_num_bits, bits_read);
+        assert_eq!(&expected, &buffer);
+    }
+
+    #[test]
+    fn test_skip_bits() {
+        const NUM_BYTES: usize = 100;
+
+        let mut vec = vec![0; NUM_BYTES];
+        let total_num_bits = NUM_BYTES * 8;
+        let v = random_bools(total_num_bits);
+        (0..total_num_bits).for_each(|i| {
+            if v[i] {
+                set_bit(&mut vec, i);
+            } else {
+                unset_bit(&mut vec, i);
+            }
+        });
+
+        let expected = vec.clone();
+
+        // test skipping and check the next value
+        let mut reader = BitReader::from(vec);
+        let mut bits_read = 0;
+        for &num_bits in [10, 60, 8].iter() {
+            let actual_bits_read = reader.skip_bits(num_bits);
+            assert_eq!(num_bits, actual_bits_read);
+
+            bits_read += num_bits;
+            assert_eq!(Some(get_bit(&expected, bits_read)), reader.get_value(1));
+            bits_read += 1;
+        }
+
+        // test skipping consecutively
+        let mut rng = rand::thread_rng();
+        loop {
+            if bits_read >= total_num_bits {
+                break;
+            }
+            let n: usize = rng.gen();
+            let num_bits = n % 20;
+            bits_read += reader.skip_bits(num_bits);
+        }
+
+        assert_eq!(total_num_bits, bits_read);
+    }
+
+    #[test]
+    fn test_get_batch() {
+        const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
+        for s in SIZE {
+            for i in 0..33 {
+                match i {
+                    0..=8 => test_get_batch_helper::<u8>(*s, i),
+                    9..=16 => test_get_batch_helper::<u16>(*s, i),
+                    _ => test_get_batch_helper::<u32>(*s, i),
+                }
+            }
+        }
+    }
+
+    fn test_get_batch_helper<T>(total: usize, num_bits: usize)
+    where
+        T: FromBytes + Default + Clone + Debug + Eq,
+    {
+        assert!(num_bits <= 32);
+        let num_bytes = ceil(num_bits, 8);
+        let mut writer = BitWriter::new(num_bytes * total);
+
+        let values: Vec<u32> = random_numbers::<u32>(total)
+            .iter()
+            .map(|v| v & ((1u64 << num_bits) - 1) as u32)
+            .collect();
+
+        // Generic values used to check against actual values read from `get_batch`.
+        let expected_values: Vec<T> = values.iter().map(|v| from_ne_slice(v.as_bytes())).collect();
+
+        (0..total).for_each(|i| {
+            assert!(writer.put_value(values[i] as u64, num_bits));
+        });
+
+        let buf = writer.consume();
+        let mut reader = BitReader::from(buf);
+        let mut batch = vec![T::default(); values.len()];
+        let values_read = reader.get_batch::<T>(&mut batch, num_bits);
+        assert_eq!(values_read, values.len());
+        for i in 0..batch.len() {
+            assert_eq!(
+                batch[i], expected_values[i],
+                "num_bits = {}, index = {}",
+                num_bits, i
+            );
+        }
+    }
+
+    #[test]
+    fn test_get_u32_batch() {
+        const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
+        for total in SIZE {
+            for num_bits in 1..33 {
+                let num_bytes = ceil(num_bits, 8);
+                let mut writer = BitWriter::new(num_bytes * total);
+
+                let values: Vec<u32> = random_numbers::<u32>(*total)
+                    .iter()
+                    .map(|v| v & ((1u64 << num_bits) - 1) as u32)
+                    .collect();
+
+                (0..*total).for_each(|i| {
+                    assert!(writer.put_value(values[i] as u64, num_bits));
+                });
+
+                let buf = writer.consume();
+                let mut reader = BitReader::from(buf);
+                let mut batch = vec![0u32; values.len()];
+                unsafe {
+                    reader.get_u32_batch(batch.as_mut_ptr(), *total, num_bits);
+                }
+                for i in 0..batch.len() {
+                    assert_eq!(
+                        batch[i], values[i],
+                        "num_bits = {}, index = {}",
+                        num_bits, i
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_put_aligned_roundtrip() {
+        test_put_aligned_rand_numbers::<u8>(4, 3);
+        test_put_aligned_rand_numbers::<u8>(16, 5);
+        test_put_aligned_rand_numbers::<i16>(32, 7);
+        test_put_aligned_rand_numbers::<i16>(32, 9);
+        test_put_aligned_rand_numbers::<i32>(32, 11);
+        test_put_aligned_rand_numbers::<i32>(32, 13);
+        test_put_aligned_rand_numbers::<i64>(32, 17);
+        test_put_aligned_rand_numbers::<i64>(32, 23);
+    }
+
+    fn test_put_aligned_rand_numbers<T>(total: usize, num_bits: usize)
+    where
+        T: Copy + FromBytes + AsBytes + Debug + PartialEq,
+        Standard: Distribution<T>,
+    {
+        assert!(num_bits <= 32);
+        assert!(total % 2 == 0);
+
+        let aligned_value_byte_width = std::mem::size_of::<T>();
+        let value_byte_width = ceil(num_bits, 8);
+        let mut writer =
+            BitWriter::new((total / 2) * (aligned_value_byte_width + value_byte_width));
+        let values: Vec<u32> = random_numbers::<u32>(total / 2)
+            .iter()
+            .map(|v| v & ((1 << num_bits) - 1))
+            .collect();
+        let aligned_values = random_numbers::<T>(total / 2);
+
+        for i in 0..total {
+            let j = i / 2;
+            if i % 2 == 0 {
+                assert!(
+                    writer.put_value(values[j] as u64, num_bits),
+                    "[{}]: put_value() failed",
+                    i
+                );
+            } else {
+                assert!(
+                    writer.put_aligned::<T>(aligned_values[j], aligned_value_byte_width),
+                    "[{}]: put_aligned() failed",
+                    i
+                );
+            }
+        }
+
+        let mut reader = BitReader::from(writer.consume());
+        for i in 0..total {
+            let j = i / 2;
+            if i % 2 == 0 {
+                let v = reader
+                    .get_value::<u64>(num_bits)
+                    .expect("get_value() should return OK");
+                assert_eq!(
+                    v, values[j] as u64,
+                    "[{}]: expected {} but got {}",
+                    i, values[j], v
+                );
+            } else {
+                let v = reader
+                    .get_aligned::<T>(aligned_value_byte_width)
+                    .expect("get_aligned() should return OK");
+                assert_eq!(
+                    v, aligned_values[j],
+                    "[{}]: expected {:?} but got {:?}",
+                    i, aligned_values[j], v
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_put_vlq_int() {
+        let total = 64;
+        let mut writer = BitWriter::new(total * 32);
+        let values = random_numbers::<u32>(total);
+        (0..total).for_each(|i| {
+            assert!(
+                writer.put_vlq_int(values[i] as u64),
+                "[{}]; put_vlq_int() failed",
+                i
+            );
+        });
+
+        let mut reader = BitReader::from(writer.consume());
+        (0..total).for_each(|i| {
+            let v = reader
+                .get_vlq_int()
+                .expect("get_vlq_int() should return OK");
+            assert_eq!(
+                v as u32, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        });
+    }
+
+    #[test]
+    fn test_put_zigzag_vlq_int() {
+        let total = 64;
+        let mut writer = BitWriter::new(total * 32);
+        let values = random_numbers::<i32>(total);
+        (0..total).for_each(|i| {
+            assert!(
+                writer.put_zigzag_vlq_int(values[i] as i64),
+                "[{}]; put_zigzag_vlq_int() failed",
+                i
+            );
+        });
+
+        let mut reader = BitReader::from(writer.consume());
+        (0..total).for_each(|i| {
+            let v = reader
+                .get_zigzag_vlq_int()
+                .expect("get_zigzag_vlq_int() should return OK");
+            assert_eq!(
+                v as i32, values[i],
+                "[{}]: expected {} but got {}",
+                i, values[i], v
+            );
+        });
+    }
+}
diff --git a/core/src/common/buffer.rs b/core/src/common/buffer.rs
new file mode 100644
index 000000000..f24038a95
--- /dev/null
+++ b/core/src/common/buffer.rs
@@ -0,0 +1,342 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::common::bit;
+use arrow::buffer::Buffer as ArrowBuffer;
+use std::{
+    alloc::{handle_alloc_error, Layout},
+    ptr::NonNull,
+    sync::Arc,
+};
+
+/// A buffer implementation. This is very similar to Arrow's [`MutableBuffer`] implementation,
+/// except that there are two modes depending on whether `owned` is true or false.
+///
+/// If `owned` is true, this behaves the same way as a Arrow [`MutableBuffer`], and the struct is
+/// the unique owner for the memory it wraps. The holder of this buffer can read or write the
+/// buffer, and the buffer itself will be released when it goes out of scope.
+///
+/// Also note that, in `owned` mode, the buffer is always filled with 0s, and its length is always
+/// equal to its capacity. It's up to the caller to decide which part of the buffer contains valid
+/// data.
+///
+/// If `owned` is false, this buffer is an alias to another buffer. The buffer itself becomes
+/// immutable and can only be read.
+#[derive(Debug)]
+pub struct CometBuffer {
+    data: NonNull<u8>,
+    len: usize,
+    capacity: usize,
+    /// Whether this buffer owns the data it points to.
+    owned: bool,
+}
+
+unsafe impl Sync for CometBuffer {}
+unsafe impl Send for CometBuffer {}
+
+/// All buffers are aligned to 64 bytes.
+const ALIGNMENT: usize = 64;
+
+impl CometBuffer {
+    /// Initializes a owned buffer filled with 0.
+    pub fn new(capacity: usize) -> Self {
+        let aligned_capacity = bit::round_upto_power_of_2(capacity, ALIGNMENT);
+        unsafe {
+            let layout = Layout::from_size_align_unchecked(aligned_capacity, ALIGNMENT);
+            let ptr = std::alloc::alloc_zeroed(layout);
+            Self {
+                data: NonNull::new(ptr).unwrap_or_else(|| handle_alloc_error(layout)),
+                len: aligned_capacity,
+                capacity: aligned_capacity,
+                owned: true,
+            }
+        }
+    }
+
+    pub fn from_ptr(ptr: *const u8, len: usize, capacity: usize) -> Self {
+        assert_eq!(
+            capacity % ALIGNMENT,
+            0,
+            "input buffer is not aligned to {} bytes",
+            ALIGNMENT
+        );
+        Self {
+            data: NonNull::new(ptr as *mut u8).unwrap_or_else(|| {
+                panic!(
+                    "cannot create CometBuffer from (ptr: {:?}, len: {}, capacity: {}",
+                    ptr, len, capacity
+                )
+            }),
+            len,
+            capacity,
+            owned: false,
+        }
+    }
+
+    /// Returns the capacity of this buffer.
+    pub fn capacity(&self) -> usize {
+        self.capacity
+    }
+
+    /// Returns the length (i.e., number of bytes) in this buffer.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Whether this buffer is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Returns the data stored in this buffer as a slice.
+    pub fn as_slice(&self) -> &[u8] {
+        self
+    }
+
+    /// Returns the data stored in this buffer as a mutable slice.
+    pub fn as_slice_mut(&mut self) -> &mut [u8] {
+        debug_assert!(self.owned, "cannot modify un-owned buffer");
+        self
+    }
+
+    /// Extends this buffer (must be an owned buffer) by appending bytes from `src`,
+    /// starting from `offset`.
+    pub fn extend_from_slice(&mut self, offset: usize, src: &[u8]) {
+        debug_assert!(self.owned, "cannot modify un-owned buffer");
+        debug_assert!(
+            offset + src.len() <= self.capacity(),
+            "buffer overflow, offset = {}, src.len = {}, capacity = {}",
+            offset,
+            src.len(),
+            self.capacity()
+        );
+
+        unsafe {
+            let dst = self.data.as_ptr().add(offset);
+            std::ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len())
+        }
+    }
+
+    /// Returns a raw pointer to this buffer's internal memory
+    /// This pointer is guaranteed to be aligned along cache-lines.
+    #[inline]
+    pub const fn as_ptr(&self) -> *const u8 {
+        self.data.as_ptr()
+    }
+
+    /// Returns a mutable raw pointer to this buffer's internal memory
+    /// This pointer is guaranteed to be aligned along cache-lines.
+    #[inline]
+    pub fn as_mut_ptr(&mut self) -> *mut u8 {
+        debug_assert!(self.owned, "cannot modify un-owned buffer");
+        self.data.as_ptr()
+    }
+
+    /// Returns an immutable Arrow buffer on the content of this buffer.
+    ///
+    /// # Safety
+    ///
+    /// This function is highly unsafe since it leaks the raw pointer to the memory region that the
+    /// originally this buffer is tracking. Because of this, the caller of this function is
+    /// expected to make sure the returned immutable [`ArrowBuffer`] will never live longer than the
+    /// this buffer. Otherwise it will result to dangling pointers.
+    ///
+    /// In the particular case of the columnar reader, we'll guarantee the above since the reader
+    /// itself is closed at the very end, after the Spark task is completed (either successfully or
+    /// unsuccessfully) through task completion listener.
+    ///
+    /// When re-using [`MutableVector`] in Comet native operators, across multiple input batches,
+    /// because of the iterator-style pattern, the content of the original mutable buffer will only
+    /// be updated once upstream operators fully consumed the previous output batch. For breaking
+    /// operators, they are responsible for copying content out of the buffers.
+    pub unsafe fn to_arrow(&self) -> ArrowBuffer {
+        let ptr = NonNull::new_unchecked(self.data.as_ptr());
+        // Uses a dummy `Arc::new(0)` as `Allocation` to ensure the memory region pointed by
+        // `ptr` won't be freed when the returned `ArrowBuffer` goes out of scope.
+        ArrowBuffer::from_custom_allocation(ptr, self.len, Arc::new(0))
+    }
+
+    /// Resets this buffer by filling all bytes with zeros.
+    pub fn reset(&mut self) {
+        debug_assert!(self.owned, "cannot modify un-owned buffer");
+        unsafe {
+            std::ptr::write_bytes(self.as_mut_ptr(), 0, self.len);
+        }
+    }
+
+    /// Resize this buffer to the `new_capacity`. For additional bytes allocated, they are filled
+    /// with 0. if `new_capacity` is less than the current capacity of this buffer, this is a no-op.
+    #[inline(always)]
+    pub fn resize(&mut self, new_capacity: usize) {
+        debug_assert!(self.owned, "cannot modify un-owned buffer");
+        if new_capacity > self.len {
+            let (ptr, new_capacity) =
+                unsafe { Self::reallocate(self.data, self.capacity, new_capacity) };
+            let diff = new_capacity - self.len;
+            self.data = ptr;
+            self.capacity = new_capacity;
+            // write the value
+            unsafe { self.data.as_ptr().add(self.len).write_bytes(0, diff) };
+            self.len = new_capacity;
+        }
+    }
+
+    unsafe fn reallocate(
+        ptr: NonNull<u8>,
+        old_capacity: usize,
+        new_capacity: usize,
+    ) -> (NonNull<u8>, usize) {
+        let new_capacity = bit::round_upto_power_of_2(new_capacity, ALIGNMENT);
+        let new_capacity = std::cmp::max(new_capacity, old_capacity * 2);
+        let raw_ptr = std::alloc::realloc(
+            ptr.as_ptr(),
+            Layout::from_size_align_unchecked(old_capacity, ALIGNMENT),
+            new_capacity,
+        );
+        let ptr = NonNull::new(raw_ptr).unwrap_or_else(|| {
+            handle_alloc_error(Layout::from_size_align_unchecked(new_capacity, ALIGNMENT))
+        });
+        (ptr, new_capacity)
+    }
+}
+
+impl Drop for CometBuffer {
+    fn drop(&mut self) {
+        if self.owned {
+            unsafe {
+                std::alloc::dealloc(
+                    self.data.as_ptr(),
+                    Layout::from_size_align_unchecked(self.capacity, ALIGNMENT),
+                )
+            }
+        }
+    }
+}
+
+impl PartialEq for CometBuffer {
+    fn eq(&self, other: &CometBuffer) -> bool {
+        if self.data.as_ptr() == other.data.as_ptr() {
+            return true;
+        }
+        if self.len != other.len {
+            return false;
+        }
+        if self.capacity != other.capacity {
+            return false;
+        }
+        self.as_slice() == other.as_slice()
+    }
+}
+
+impl From<&ArrowBuffer> for CometBuffer {
+    fn from(value: &ArrowBuffer) -> Self {
+        assert_eq!(value.len(), value.capacity());
+        CometBuffer::from_ptr(value.as_ptr(), value.len(), value.capacity())
+    }
+}
+
+impl std::ops::Deref for CometBuffer {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
+    }
+}
+
+impl std::ops::DerefMut for CometBuffer {
+    fn deref_mut(&mut self) -> &mut [u8] {
+        assert!(self.owned, "cannot modify un-owned buffer");
+        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity) }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::buffer::Buffer as ArrowBuffer;
+
+    #[test]
+    fn test_buffer_new() {
+        let buf = CometBuffer::new(63);
+        assert_eq!(64, buf.capacity());
+        assert_eq!(64, buf.len());
+        assert!(!buf.is_empty());
+    }
+
+    #[test]
+    fn test_resize() {
+        let mut buf = CometBuffer::new(1);
+        assert_eq!(64, buf.capacity());
+        assert_eq!(64, buf.len());
+
+        buf.resize(100);
+        assert_eq!(128, buf.capacity());
+        assert_eq!(128, buf.len());
+
+        // resize with less capacity is no-op
+        buf.resize(20);
+        assert_eq!(128, buf.capacity());
+        assert_eq!(128, buf.len());
+    }
+
+    #[test]
+    fn test_extend_from_slice() {
+        let mut buf = CometBuffer::new(100);
+        buf.extend_from_slice(0, b"hello");
+        assert_eq!(b"hello", &buf.as_slice()[0..5]);
+
+        buf.extend_from_slice(5, b" world");
+        assert_eq!(b"hello world", &buf.as_slice()[0..11]);
+
+        buf.reset();
+        buf.extend_from_slice(0, b"hello arrow");
+        assert_eq!(b"hello arrow", &buf.as_slice()[0..11]);
+    }
+
+    #[test]
+    fn test_to_arrow() {
+        let mut buf = CometBuffer::new(1);
+
+        let str = b"aaaa bbbb cccc dddd";
+        buf.extend_from_slice(0, str.as_slice());
+
+        assert_eq!(64, buf.len());
+        assert_eq!(64, buf.capacity());
+        assert_eq!(b"aaaa bbbb cccc dddd", &buf.as_slice()[0..str.len()]);
+
+        unsafe {
+            let immutable_buf: ArrowBuffer = buf.to_arrow();
+            assert_eq!(64, immutable_buf.len());
+            assert_eq!(str, &immutable_buf.as_slice()[0..str.len()]);
+        }
+    }
+
+    #[test]
+    fn test_unowned() {
+        let arrow_buf = ArrowBuffer::from(b"hello comet");
+        let buf = CometBuffer::from_ptr(arrow_buf.as_ptr(), arrow_buf.len(), arrow_buf.capacity());
+
+        assert_eq!(11, buf.len());
+        assert_eq!(64, buf.capacity());
+        assert_eq!(b"hello comet", &buf.as_slice()[0..11]);
+
+        unsafe {
+            let arrow_buf2 = buf.to_arrow();
+            assert_eq!(arrow_buf, arrow_buf2);
+        }
+    }
+}
diff --git a/core/src/common/mod.rs b/core/src/common/mod.rs
new file mode 100644
index 000000000..8d5030c02
--- /dev/null
+++ b/core/src/common/mod.rs
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+pub mod bit;
+
+use crate::TypeTrait;
+
+/// Getter APIs for Comet vectors.
+trait ValueGetter<T: TypeTrait> {
+    /// Gets the non-null value at `idx`.
+    ///
+    /// Note that null check needs to be done before the call, to ensure the value at `idx` is
+    /// not null.
+    fn value(&self, idx: usize) -> T::Native;
+}
+
+/// Setter APIs for Comet mutable vectors.
+trait ValueSetter<T: TypeTrait> {
+    /// Appends a non-null value `v` to the end of this vector.
+    fn append_value(&mut self, v: &T::Native);
+}
+
+mod vector;
+
+mod buffer;
+pub use buffer::*;
+
+mod mutable_vector;
+pub use mutable_vector::*;
diff --git a/core/src/common/mutable_vector.rs b/core/src/common/mutable_vector.rs
new file mode 100644
index 000000000..ba29fc01a
--- /dev/null
+++ b/core/src/common/mutable_vector.rs
@@ -0,0 +1,409 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+    array::ArrayData, buffer::Buffer as ArrowBuffer, datatypes::DataType as ArrowDataType,
+};
+
+use crate::{
+    common::{bit, CometBuffer, ValueGetter, ValueSetter},
+    BinaryType, StringType, StringView, TypeTrait,
+};
+
+const DEFAULT_ARRAY_LEN: usize = 4;
+
+/// A mutable vector that can be re-used across batches.
+#[derive(Debug)]
+pub struct MutableVector {
+    /// The Arrow type for the elements of this vector.
+    pub(crate) arrow_type: ArrowDataType,
+
+    /// The number of total elements in this vector.
+    pub(crate) num_values: usize,
+
+    /// The number of null elements in this vector, must <= `num_values`.
+    pub(crate) num_nulls: usize,
+
+    /// The capacity of the vector
+    pub(crate) capacity: usize,
+
+    /// How many bits are required to store a single value
+    pub(crate) bit_width: usize,
+
+    /// The validity buffer of this Arrow vector. A bit set at position `i` indicates the `i`th
+    /// element is not null. Otherwise, an unset bit at position `i` indicates the `i`th element is
+    /// null.
+    pub(crate) validity_buffer: CometBuffer,
+
+    /// The value buffer of this Arrow vector. This could store either offsets if the vector
+    /// is of list or struct type, or actual values themselves otherwise.
+    pub(crate) value_buffer: CometBuffer,
+
+    /// Child vectors for non-primitive types (e.g., list, struct).
+    pub(crate) children: Vec<MutableVector>,
+
+    /// Dictionary (i.e., values) associated with this vector. Only set if using dictionary
+    /// encoding.
+    pub(crate) dictionary: Option<Box<MutableVector>>,
+
+    /// Whether all the values in the vector are not null.
+    ///
+    /// This is useful so we can skip setting non-null for each individual value, in the
+    /// `validity_buffer`.
+    all_non_null: bool,
+}
+
+impl MutableVector {
+    pub fn new(capacity: usize, arrow_type: &ArrowDataType) -> Self {
+        let bit_width = Self::bit_width(arrow_type);
+        Self::new_with_bit_width(capacity, arrow_type.clone(), bit_width)
+    }
+
+    pub fn new_with_bit_width(
+        capacity: usize,
+        arrow_type: ArrowDataType,
+        bit_width: usize,
+    ) -> Self {
+        let validity_len = bit::ceil(capacity, 8);
+        let validity_buffer = CometBuffer::new(validity_len);
+
+        let mut value_capacity = capacity;
+        if Self::is_binary_type(&arrow_type) {
+            // Arrow offset array needs to have one extra slot
+            value_capacity += 1;
+        }
+        // Make sure the capacity is positive
+        let len = bit::ceil(value_capacity * bit_width, 8);
+        let mut value_buffer = CometBuffer::new(len);
+
+        let mut children = Vec::new();
+
+        match arrow_type {
+            ArrowDataType::Binary | ArrowDataType::Utf8 => {
+                children.push(MutableVector::new_with_bit_width(
+                    capacity,
+                    ArrowDataType::Int8,
+                    DEFAULT_ARRAY_LEN * 8,
+                ));
+            }
+            _ => {}
+        }
+
+        if Self::is_binary_type(&arrow_type) {
+            // Setup the first offset which is always 0.
+            let zero: u32 = 0;
+            bit::memcpy_value(&zero, 4, &mut value_buffer);
+        }
+
+        Self {
+            arrow_type,
+            num_values: 0,
+            num_nulls: 0,
+            capacity,
+            bit_width,
+            validity_buffer,
+            value_buffer,
+            children,
+            dictionary: None,
+            all_non_null: false,
+        }
+    }
+
+    /// Appends a non-null value `v` to the end of this vector.
+    #[inline]
+    pub fn append_value<T: TypeTrait>(&mut self, v: &T::Native) {
+        <dyn ValueSetter<T>>::append_value(self, v);
+    }
+
+    /// Gets the non-null value at `idx` of this vector.
+    #[inline]
+    pub fn value<T: TypeTrait>(&self, idx: usize) -> T::Native {
+        <dyn ValueGetter<T>>::value(self, idx)
+    }
+
+    /// Whether the given value at `idx` of this vector is null.
+    #[inline]
+    pub fn is_null(&self, idx: usize) -> bool {
+        unsafe { !bit::get_bit_raw(self.validity_buffer.as_ptr(), idx) }
+    }
+
+    /// Resets this vector to the initial state.
+    #[inline]
+    pub fn reset(&mut self) {
+        self.num_values = 0;
+        self.num_nulls = 0;
+        self.all_non_null = false;
+        self.validity_buffer.reset();
+        if Self::is_binary_type(&self.arrow_type) {
+            // Reset the first offset to 0
+            let zero: u32 = 0;
+            bit::memcpy_value(&zero, 4, &mut self.value_buffer);
+            // Also reset the child value vector
+            let child = &mut self.children[0];
+            child.reset();
+        } else if Self::should_reset_value_buffer(&self.arrow_type) {
+            self.value_buffer.reset();
+        }
+    }
+
+    /// Appends a new null value to the end of this vector.
+    #[inline]
+    pub fn put_null(&mut self) {
+        self.put_nulls(1)
+    }
+
+    /// Appends `n` null values to the end of this vector.
+    #[inline]
+    pub fn put_nulls(&mut self, n: usize) {
+        // We need to update offset buffer for binary.
+        if Self::is_binary_type(&self.arrow_type) {
+            let mut offset = self.num_values * 4;
+            let prev_offset_value = bit::read_num_bytes_u32(4, &self.value_buffer[offset..]);
+            offset += 4;
+            (0..n).for_each(|_| {
+                bit::memcpy_value(&prev_offset_value, 4, &mut self.value_buffer[offset..]);
+                offset += 4;
+            });
+        }
+
+        self.num_nulls += n;
+        self.num_values += n;
+    }
+
+    /// Returns the number of total values (including both null and non-null) of this vector.
+    #[inline]
+    pub fn num_values(&self) -> usize {
+        self.num_values
+    }
+
+    /// Returns the number of null values of this vector.
+    #[inline]
+    pub fn num_nulls(&self) -> usize {
+        self.num_nulls
+    }
+
+    #[inline]
+    pub fn set_not_null(&mut self, i: usize) {
+        unsafe {
+            bit::set_bit_raw(self.validity_buffer.as_mut_ptr(), i);
+        }
+    }
+
+    /// Sets all values in this vector to be non-null.
+    #[inline]
+    pub fn set_all_non_null(&mut self) {
+        self.all_non_null = true;
+    }
+
+    /// Sets the content of validity buffer to be `buffer`.
+    pub fn set_validity_buffer(&mut self, buffer: &ArrowBuffer) {
+        self.validity_buffer = buffer.into();
+    }
+
+    /// Sets the content of value buffer to be `buffer`.
+    pub fn set_value_buffer(&mut self, buffer: &ArrowBuffer) {
+        self.value_buffer = buffer.into();
+    }
+
+    /// Sets the dictionary of this to be `dict`.
+    pub fn set_dictionary(&mut self, dict: MutableVector) {
+        self.dictionary = Some(Box::new(dict))
+    }
+
+    /// Clones this into an Arrow [`ArrayData`](arrow::array::ArrayData). Note that the caller of
+    /// this method MUST make sure the returned `ArrayData` won't live longer than this vector
+    /// itself. Otherwise, dangling pointer may happen.
+    ///
+    /// # Safety
+    ///
+    /// This method is highly unsafe since it calls `to_immutable` which leaks raw pointer to the
+    /// memory region that are tracked by `ArrowMutableBuffer`. Please see comments on
+    /// `to_immutable` buffer to understand the motivation.
+    pub fn get_array_data(&mut self) -> ArrayData {
+        unsafe {
+            let data_type = if let Some(d) = &self.dictionary {
+                ArrowDataType::Dictionary(
+                    Box::new(ArrowDataType::Int32),
+                    Box::new(d.arrow_type.clone()),
+                )
+            } else {
+                self.arrow_type.clone()
+            };
+            let mut builder = ArrayData::builder(data_type)
+                .len(self.num_values)
+                .add_buffer(self.value_buffer.to_arrow());
+
+            builder = if self.all_non_null {
+                builder.null_count(0)
+            } else {
+                builder
+                    .null_bit_buffer(Some(self.validity_buffer.to_arrow()))
+                    .null_count(self.num_nulls)
+            };
+
+            if Self::is_binary_type(&self.arrow_type) && self.dictionary.is_none() {
+                let child = &mut self.children[0];
+                builder = builder.add_buffer(child.value_buffer.to_arrow());
+            }
+
+            if let Some(d) = &mut self.dictionary {
+                builder = builder.add_child_data(d.get_array_data());
+            }
+
+            builder.build_unchecked()
+        }
+    }
+
+    /// Returns the number of bits it takes to store one element of `arrow_type` in the value buffer
+    /// of this vector.
+    pub fn bit_width(arrow_type: &ArrowDataType) -> usize {
+        match arrow_type {
+            ArrowDataType::Boolean => 1,
+            ArrowDataType::Int8 => 8,
+            ArrowDataType::Int16 => 16,
+            ArrowDataType::Int32 | ArrowDataType::Float32 | ArrowDataType::Date32 => 32,
+            ArrowDataType::Int64 | ArrowDataType::Float64 | ArrowDataType::Timestamp(_, _) => 64,
+            ArrowDataType::FixedSizeBinary(type_length) => *type_length as usize * 8,
+            ArrowDataType::Decimal128(..) => 128, // Arrow stores decimal with 16 bytes
+            ArrowDataType::Binary | ArrowDataType::Utf8 => 32, // Only count offset size
+            dt => panic!("Unsupported Arrow data type: {:?}", dt),
+        }
+    }
+
+    #[inline]
+    fn is_binary_type(dt: &ArrowDataType) -> bool {
+        matches!(dt, ArrowDataType::Binary | ArrowDataType::Utf8)
+    }
+
+    #[inline]
+    fn should_reset_value_buffer(dt: &ArrowDataType) -> bool {
+        // - Boolean type expects have a zeroed value buffer
+        // - Decimal may pad buffer with 0xff so we need to clear them before a new batch
+        matches!(dt, ArrowDataType::Boolean | ArrowDataType::Decimal128(_, _))
+    }
+
+    /// Creates an immutable reference from a mutable Arrow buffer `buf`.
+    ///
+    /// # Safety
+    ///
+    /// This function is highly unsafe. Please see documentation of the [`to_arrow`] method for
+    /// details.
+    #[inline]
+    unsafe fn to_immutable(buf: &CometBuffer) -> ArrowBuffer {
+        buf.to_arrow()
+    }
+}
+
+impl<T: TypeTrait> ValueGetter<T> for MutableVector {
+    default fn value(&self, idx: usize) -> T::Native {
+        unsafe {
+            let ptr = self.value_buffer.as_ptr() as *const T::Native;
+            *ptr.add(idx)
+        }
+    }
+}
+
+impl ValueGetter<StringType> for MutableVector {
+    fn value(&self, _: usize) -> StringView {
+        unimplemented!("'value' on StringType is currently unsupported");
+    }
+}
+
+impl ValueGetter<BinaryType> for MutableVector {
+    fn value(&self, _: usize) -> StringView {
+        unimplemented!("'value' on BinaryType is currently unsupported");
+    }
+}
+
+impl<T: TypeTrait> ValueSetter<T> for MutableVector {
+    default fn append_value(&mut self, v: &T::Native) {
+        unsafe {
+            let ptr = self.value_buffer.as_mut_ptr() as *mut T::Native;
+            *ptr.add(self.num_values) = *v;
+        }
+        self.num_values += 1;
+    }
+}
+
+impl ValueSetter<StringType> for MutableVector {
+    fn append_value(&mut self, _: &StringView) {
+        unimplemented!("'append_value' on StringType is currently unsupported");
+    }
+}
+
+impl ValueSetter<BinaryType> for MutableVector {
+    fn append_value(&mut self, _: &StringView) {
+        unimplemented!("'append_value' on BinaryType is currently unsupported");
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::IntegerType;
+
+    #[test]
+    fn set_and_get() {
+        let mut mv = MutableVector::new(1024, &ArrowDataType::Int32);
+
+        for n in 0..100 {
+            mv.append_value::<IntegerType>(&(n * n));
+        }
+        assert_eq!(mv.num_values(), 100);
+        assert_eq!(mv.num_nulls(), 0);
+
+        for n in 0..100 {
+            assert_eq!(mv.value::<IntegerType>(n) as usize, n * n);
+        }
+
+        mv.reset();
+
+        for n in 0..200 {
+            if n % 2 == 0 {
+                mv.put_null();
+            } else {
+                mv.append_value::<IntegerType>(&(n * 2));
+            }
+        }
+
+        assert_eq!(mv.num_values(), 200);
+        assert_eq!(mv.num_nulls(), 100);
+
+        for n in 0..200 {
+            if n % 2 == 0 {
+                assert!(mv.is_null(n));
+            } else {
+                assert_eq!(mv.value::<IntegerType>(n) as usize, n * 2);
+            }
+        }
+    }
+
+    #[test]
+    #[should_panic]
+    fn set_string_unsupported() {
+        let mut mv = MutableVector::new(1024, &ArrowDataType::Utf8);
+        let sv = StringView::default();
+        mv.append_value::<StringType>(&sv);
+    }
+
+    #[test]
+    #[should_panic]
+    fn get_string_unsupported() {
+        let mv = MutableVector::new(1024, &ArrowDataType::Utf8);
+        mv.value::<StringType>(0);
+    }
+}
diff --git a/core/src/common/vector.rs b/core/src/common/vector.rs
new file mode 100644
index 000000000..1afb1e78f
--- /dev/null
+++ b/core/src/common/vector.rs
@@ -0,0 +1,523 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    common::{bit, ValueGetter},
+    BoolType, DataType, TypeTrait, BITS_PER_BYTE, STRING_VIEW_LEN, STRING_VIEW_PREFIX_LEN,
+};
+use arrow::{
+    array::{Array, ArrayRef},
+    buffer::{Buffer, MutableBuffer},
+    datatypes::DataType as ArrowDataType,
+};
+use arrow_data::ArrayData;
+
+/// A vector that holds elements of plain types (i.e., no nested type such as list, map, struct).
+pub struct PlainVector {
+    /// The data type for elements in this vector
+    data_type: DataType,
+    /// Total number of values in this vector
+    num_values: usize,
+    /// Total number of nulls in this vector. Must <= `num_values`.
+    num_nulls: usize,
+    /// The value buffer
+    value_buffer: ValueBuffer,
+    /// Number of bytes for each element in the vector. For variable length types such as string
+    /// and binary, this will be the size of [`StringView`] which is always 16 bytes.
+    value_size: usize,
+    /// Offsets into buffers
+    offset: usize,
+    /// The validity buffer. If empty, all values in this vector are not null.
+    validity_buffer: Option<Buffer>,
+    /// Whether this vector is dictionary encoded
+    is_dictionary: bool,
+    /// Indices (or dictionary keys) when `is_dictionary` is true. Otherwise, this is always
+    /// an empty vector.
+    indices: IndexBuffer,
+}
+
+impl<T: TypeTrait> ValueGetter<T> for PlainVector {
+    default fn value(&self, idx: usize) -> T::Native {
+        let offset = self.offset(idx);
+        unsafe {
+            let ptr = self.value_buffer.as_ptr() as *const T::Native;
+            *ptr.add(offset)
+        }
+    }
+}
+
+impl ValueGetter<BoolType> for PlainVector {
+    fn value(&self, idx: usize) -> bool {
+        let offset = self.offset(idx);
+        unsafe { bit::get_bit_raw(self.value_buffer.as_ptr(), offset) }
+    }
+}
+
+impl PlainVector {
+    /// Returns the data type of this vector.
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+
+    /// Returns the total number of elements in this vector.
+    pub fn num_values(&self) -> usize {
+        self.num_values
+    }
+
+    /// Returns the total number of nulls in this vector.
+    pub fn num_nulls(&self) -> usize {
+        self.num_nulls
+    }
+
+    /// Whether there is any null in this vector.
+    pub fn has_null(&self) -> bool {
+        self.num_nulls > 0
+    }
+
+    /// Whether the element at `idx` is null.
+    pub fn is_null(&self, idx: usize) -> bool {
+        if let Some(validity_buffer) = &self.validity_buffer {
+            unsafe {
+                return !bit::get_bit_raw(validity_buffer.as_ptr(), self.offset + idx);
+            }
+        }
+
+        false
+    }
+
+    #[inline(always)]
+    pub fn value<T: TypeTrait>(&self, idx: usize) -> T::Native {
+        <dyn ValueGetter<T>>::value(self, idx)
+    }
+
+    #[inline(always)]
+    fn offset(&self, idx: usize) -> usize {
+        let idx = self.offset + idx;
+        if self.is_dictionary {
+            self.indices.get(idx)
+        } else {
+            idx
+        }
+    }
+}
+
+impl From<ArrayData> for PlainVector {
+    fn from(data: ArrayData) -> Self {
+        assert!(!data.buffers().is_empty(), "expected at least one buffer");
+        let arrow_dt = data.data_type();
+        let dt: DataType = arrow_dt.into();
+        let is_dictionary = matches!(arrow_dt, ArrowDataType::Dictionary(_, _));
+
+        let mut value_buffers = data.buffers();
+        let mut indices = IndexBuffer::empty();
+        let validity_buffer = data.nulls().map(|nb| nb.buffer().clone());
+
+        if is_dictionary {
+            // in case of dictionary data, the dictionary values are stored in child data, while
+            // dictionary keys are stored in `value_buffer`.
+            assert_eq!(
+                data.child_data().len(),
+                1,
+                "child data should contain a single array"
+            );
+            let child_data = &data.child_data()[0];
+            indices = IndexBuffer::new(value_buffers[0].clone(), data.len() + data.offset());
+            value_buffers = child_data.buffers();
+        }
+
+        let value_size = dt.kind().type_size() / BITS_PER_BYTE;
+        let value_buffer = ValueBuffer::new(&dt, value_buffers.to_vec(), data.len());
+
+        Self {
+            data_type: dt,
+            num_values: data.len(),
+            num_nulls: data.null_count(),
+            value_buffer,
+            value_size,
+            offset: data.offset(),
+            validity_buffer,
+            is_dictionary,
+            indices,
+        }
+    }
+}
+
+impl From<ArrayRef> for PlainVector {
+    fn from(value: ArrayRef) -> Self {
+        Self::from(value.into_data())
+    }
+}
+
+struct ValueBuffer {
+    ptr: *const u8,
+    /// Keep the `ptr` alive
+    original_buffers: Vec<Buffer>,
+}
+
+impl ValueBuffer {
+    pub fn new(dt: &DataType, buffers: Vec<Buffer>, len: usize) -> Self {
+        if matches!(dt, DataType::String | DataType::Binary) {
+            assert_eq!(
+                2,
+                buffers.len(),
+                "expected two buffers (offset, value) for string/binary"
+            );
+
+            let mut string_view_buf = MutableBuffer::from_len_zeroed(len * 16);
+            let buf_mut = string_view_buf.as_mut_ptr();
+
+            let offsets = buffers[0].as_ptr() as *const i32;
+            let values = buffers[1].as_ptr();
+
+            let mut dst_offset = 0;
+            let mut start = 0;
+            unsafe {
+                for i in 0..len {
+                    // StringView format:
+                    //   - length (4 bytes)
+                    //   - first 4 bytes of the string/binary (4 bytes)
+                    //   - pointer to the string/binary (8 bytes)
+                    let end = *offsets.add(i + 1);
+                    let len = end - start;
+                    let value = values.add(start as usize);
+                    *(buf_mut.add(dst_offset) as *mut i32) = len;
+                    if len >= STRING_VIEW_PREFIX_LEN as i32 {
+                        // only store prefix if the string has at least 4 bytes, otherwise, we'll
+                        // zero pad the bytes.
+                        std::ptr::copy_nonoverlapping(
+                            value,
+                            buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN),
+                            STRING_VIEW_PREFIX_LEN,
+                        );
+                    }
+                    *(buf_mut.add(dst_offset + STRING_VIEW_PREFIX_LEN + 4) as *mut usize) =
+                        value as usize;
+                    start = end;
+                    dst_offset += STRING_VIEW_LEN;
+                }
+            }
+
+            let string_buffer: Buffer = string_view_buf.into();
+            let ptr = string_buffer.as_ptr();
+
+            Self {
+                ptr,
+                original_buffers: vec![string_buffer, buffers[1].clone()],
+            }
+        } else {
+            let ptr = buffers[0].as_ptr();
+            Self {
+                ptr,
+                original_buffers: buffers,
+            }
+        }
+    }
+
+    /// Returns the raw pointer for the data in this value buffer.
+    /// NOTE: caller of this should NOT store the raw pointer to avoid dangling pointers.
+    pub fn as_ptr(&self) -> *const u8 {
+        self.ptr
+    }
+}
+
+struct IndexBuffer {
+    ptr: *const u8,
+    /// Keep the `ptr` alive.
+    buf: Option<Buffer>,
+    /// Total number of elements in the index buffer
+    len: usize,
+}
+
+impl IndexBuffer {
+    pub fn new(buf: Buffer, len: usize) -> Self {
+        let ptr = buf.as_ptr();
+        Self {
+            buf: Some(buf),
+            ptr,
+            len,
+        }
+    }
+
+    pub fn empty() -> Self {
+        Self {
+            buf: None,
+            ptr: std::ptr::null(),
+            len: 0,
+        }
+    }
+
+    #[inline]
+    pub fn get(&self, i: usize) -> usize {
+        debug_assert!(i < self.len);
+        unsafe {
+            let ptr = self.ptr as *const i32;
+            *ptr.add(i) as usize
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        BoolType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType,
+        NativeEqual, ShortType, StringType, TimestampType, TypeTrait, STRING_VIEW_PREFIX_LEN,
+    };
+
+    use crate::common::vector::PlainVector;
+    use arrow::{
+        array::{
+            Array, BooleanArray, Date32Array, Decimal128Array, Float32Array, Float64Array,
+            Int16Array, Int32Array, Int8Array, StringArray,
+        },
+        buffer::Buffer,
+        datatypes::{DataType as ArrowDataType, ToByteSlice},
+    };
+    use arrow_array::TimestampMicrosecondArray;
+    use arrow_data::ArrayData;
+
+    #[test]
+    fn primitive_no_null() {
+        let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
+        let vector = PlainVector::from(arr.into_data());
+
+        assert_eq!(5, vector.num_values());
+        assert_eq!(0, vector.num_nulls());
+        assert_eq!(4, vector.value_size);
+        assert!(vector.validity_buffer.is_none());
+
+        for i in 0..5 {
+            assert!(!vector.is_null(i));
+            assert_eq!(i as i32, vector.value::<IntegerType>(i))
+        }
+    }
+
+    fn check_answer<T: TypeTrait>(expected: &[Option<T::Native>], actual: &PlainVector) {
+        assert_eq!(expected.len(), actual.num_values());
+        let nulls = expected
+            .iter()
+            .filter(|v| v.is_none())
+            .collect::<Vec<&Option<T::Native>>>();
+        assert_eq!(nulls.len(), actual.num_nulls());
+
+        for i in 0..expected.len() {
+            if let Some(v) = expected[i] {
+                assert!(!actual.is_null(i));
+                assert!(v.is_equal(&actual.value::<T>(i)));
+            } else {
+                assert!(actual.is_null(i));
+            }
+        }
+    }
+
+    #[test]
+    fn primitive_with_nulls() {
+        let data = vec![Some(0), None, Some(2), None, Some(4)];
+        let arr = TimestampMicrosecondArray::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<TimestampType>(&data, &vector);
+    }
+
+    #[test]
+    fn primitive_with_offsets_nulls() {
+        let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4), None, Some(7)]);
+        let data = arr.into_data();
+        let vector = PlainVector::from(data.slice(2, 3));
+
+        assert_eq!(3, vector.num_values());
+        assert_eq!(1, vector.num_nulls());
+
+        for i in 0..2 {
+            if i % 2 == 0 {
+                assert!(!vector.is_null(i));
+                assert_eq!((i + 2) as i32, vector.value::<IntegerType>(i));
+            } else {
+                assert!(vector.is_null(i));
+            }
+        }
+    }
+
+    #[test]
+    fn primitive_dictionary() {
+        let value_data = ArrayData::builder(ArrowDataType::Int8)
+            .len(8)
+            .add_buffer(Buffer::from(
+                &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
+            ))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        let keys = Buffer::from(&[2_i32, 3, 4].to_byte_slice());
+
+        // Construct a dictionary array from the above two
+        let key_type = ArrowDataType::Int32;
+        let value_type = ArrowDataType::Int8;
+        let dict_data_type = ArrowDataType::Dictionary(Box::new(key_type), Box::new(value_type));
+        let dict_data = ArrayData::builder(dict_data_type)
+            .len(3)
+            .add_buffer(keys)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+
+        let vector = PlainVector::from(dict_data);
+
+        assert_eq!(DataType::Byte, *vector.data_type());
+        assert_eq!(3, vector.num_values());
+        assert_eq!(0, vector.num_nulls());
+        assert!(!vector.has_null());
+        assert_eq!(12, vector.value::<ByteType>(0));
+        assert_eq!(13, vector.value::<ByteType>(1));
+        assert_eq!(14, vector.value::<ByteType>(2));
+    }
+
+    #[test]
+    fn bools() {
+        let data = vec![Some(true), None, Some(false), None, Some(true)];
+        let arr = BooleanArray::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<BoolType>(&data, &vector);
+    }
+
+    #[test]
+    fn bytes() {
+        let data = vec![Some(4_i8), None, None, Some(5_i8), Some(7_i8)];
+        let arr = Int8Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<ByteType>(&data, &vector);
+    }
+
+    #[test]
+    fn shorts() {
+        let data = vec![Some(4_i16), None, None, Some(-40_i16), Some(-3_i16)];
+        let arr = Int16Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<ShortType>(&data, &vector);
+    }
+
+    #[test]
+    fn floats() {
+        let data = vec![
+            Some(4.0_f32),
+            Some(-0.0_f32),
+            Some(-3.0_f32),
+            Some(0.0_f32),
+            Some(std::f32::consts::PI),
+        ];
+        let arr = Float32Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<FloatType>(&data, &vector);
+    }
+
+    #[test]
+    fn doubles() {
+        let data = vec![
+            None,
+            Some(std::f64::consts::PI),
+            Some(4.0_f64),
+            Some(f64::NAN),
+        ];
+        let arr = Float64Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<DoubleType>(&data, &vector);
+    }
+
+    #[test]
+    fn decimals() {
+        let data = vec![Some(1_i128), None, None, Some(i128::MAX)];
+        let arr = Decimal128Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<DecimalType>(&data, &vector);
+    }
+
+    #[test]
+    fn timestamps() {
+        // 1:        00:00:00.001
+        // 37800005: 10:30:00.005
+        // 86399210: 23:59:59.210
+        let data = vec![Some(1), None, Some(37_800_005), Some(86_399_210)];
+        let arr = TimestampMicrosecondArray::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<TimestampType>(&data, &vector);
+    }
+
+    #[test]
+    fn dates() {
+        let data = vec![Some(100), None, Some(200), None];
+        let arr = Date32Array::from(data.clone());
+        let vector = PlainVector::from(arr.into_data());
+
+        check_answer::<DateType>(&data, &vector);
+    }
+
+    #[test]
+    fn string_no_nulls() {
+        let values: Vec<&str> = vec!["hello", "", "comet"];
+        let arr = StringArray::from(values.clone());
+
+        let vector = PlainVector::from(arr.into_data());
+        assert_eq!(3, vector.num_values());
+        assert_eq!(0, vector.num_nulls());
+
+        for i in 0..values.len() {
+            let expected = values[i];
+            let actual = vector.value::<StringType>(i);
+            assert_eq!(expected.len(), actual.len as usize);
+            if expected.len() >= STRING_VIEW_PREFIX_LEN {
+                assert_eq!(
+                    &expected[..STRING_VIEW_PREFIX_LEN],
+                    String::from_utf8_lossy(&actual.prefix)
+                );
+            }
+            assert_eq!(expected, actual.as_utf8_str());
+        }
+    }
+
+    #[test]
+    fn string_with_nulls() {
+        let data = [Some("hello"), None, Some("comet")];
+        let arr = StringArray::from(data.to_vec().clone());
+
+        let vector = PlainVector::from(arr.into_data());
+        assert_eq!(3, vector.num_values());
+        assert_eq!(1, vector.num_nulls());
+
+        for i in 0..data.len() {
+            if data[i].is_none() {
+                assert!(vector.is_null(i));
+            } else {
+                let expected = data[i].unwrap();
+                let actual = vector.value::<StringType>(i);
+                if expected.len() >= STRING_VIEW_PREFIX_LEN {
+                    assert_eq!(
+                        &expected[..STRING_VIEW_PREFIX_LEN],
+                        String::from_utf8_lossy(&actual.prefix)
+                    );
+                }
+                assert_eq!(expected, actual.as_utf8_str());
+            }
+        }
+    }
+}
diff --git a/core/src/data_type.rs b/core/src/data_type.rs
new file mode 100644
index 000000000..b275de1c6
--- /dev/null
+++ b/core/src/data_type.rs
@@ -0,0 +1,241 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType as ArrowDataType;
+use arrow_schema::TimeUnit;
+use std::{cmp, fmt::Debug};
+
+#[derive(Debug, PartialEq)]
+pub enum DataType {
+    Boolean,
+    Byte,
+    Short,
+    Integer,
+    Long,
+    Float,
+    Double,
+    Decimal(u8, i8),
+    String,
+    Binary,
+    Timestamp,
+    Date,
+}
+
+impl From<&ArrowDataType> for DataType {
+    fn from(dt: &ArrowDataType) -> Self {
+        match dt {
+            ArrowDataType::Boolean => DataType::Boolean,
+            ArrowDataType::Int8 => DataType::Byte,
+            ArrowDataType::Int16 => DataType::Short,
+            ArrowDataType::Int32 => DataType::Integer,
+            ArrowDataType::Int64 => DataType::Long,
+            ArrowDataType::Float32 => DataType::Float,
+            ArrowDataType::Float64 => DataType::Double,
+            ArrowDataType::Decimal128(precision, scale) => DataType::Decimal(*precision, *scale),
+            ArrowDataType::Utf8 => DataType::String,
+            ArrowDataType::Binary => DataType::Binary,
+            // Spark always store timestamp in micro seconds
+            ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => DataType::Timestamp,
+            ArrowDataType::Date32 => DataType::Date,
+            ArrowDataType::Dictionary(key_dt, value_dt) if is_valid_key_type(key_dt) => {
+                Self::from(value_dt.as_ref())
+            }
+            dt => panic!("unsupported Arrow data type: {:?}", dt),
+        }
+    }
+}
+
+impl DataType {
+    pub fn kind(&self) -> TypeKind {
+        match self {
+            DataType::Boolean => TypeKind::Boolean,
+            DataType::Byte => TypeKind::Byte,
+            DataType::Short => TypeKind::Short,
+            DataType::Integer => TypeKind::Integer,
+            DataType::Long => TypeKind::Long,
+            DataType::Float => TypeKind::Float,
+            DataType::Double => TypeKind::Double,
+            DataType::Decimal(_, _) => TypeKind::Decimal,
+            DataType::String => TypeKind::String,
+            DataType::Binary => TypeKind::Binary,
+            DataType::Timestamp => TypeKind::Timestamp,
+            DataType::Date => TypeKind::Date,
+        }
+    }
+}
+
+/// Comet only use i32 as dictionary key
+fn is_valid_key_type(dt: &ArrowDataType) -> bool {
+    matches!(dt, ArrowDataType::Int32)
+}
+
+/// Unlike [`DataType`], [`TypeKind`] doesn't carry extra information about the type itself, such as
+/// decimal precision & scale. Instead, it is merely a token that is used to do runtime case
+/// analysis depending on the actual type. It can be obtained from a `TypeTrait` generic parameter.
+#[derive(Debug, PartialEq)]
+pub enum TypeKind {
+    Boolean,
+    Byte,
+    Short,
+    Integer,
+    Long,
+    Float,
+    Double,
+    Decimal,
+    String,
+    Binary,
+    Timestamp,
+    Date,
+}
+
+pub const BITS_PER_BYTE: usize = 8;
+
+impl TypeKind {
+    /// Returns the size of this type, in number of bits.
+    pub fn type_size(&self) -> usize {
+        match self {
+            TypeKind::Boolean => 1,
+            TypeKind::Byte => BITS_PER_BYTE,
+            TypeKind::Short => BITS_PER_BYTE * 2,
+            TypeKind::Integer | TypeKind::Float => BITS_PER_BYTE * 4,
+            TypeKind::Long | TypeKind::Double => BITS_PER_BYTE * 8,
+            TypeKind::Decimal => BITS_PER_BYTE * 16,
+            TypeKind::String | TypeKind::Binary => BITS_PER_BYTE * 16,
+            TypeKind::Timestamp => BITS_PER_BYTE * 8,
+            TypeKind::Date => BITS_PER_BYTE * 4,
+        }
+    }
+}
+
+pub const STRING_VIEW_LEN: usize = 16; // StringView is stored using 16 bytes
+pub const STRING_VIEW_PREFIX_LEN: usize = 4; // String prefix in StringView is stored using 4 bytes
+
+#[repr(C, align(16))]
+#[derive(Clone, Copy, Debug)]
+pub struct StringView {
+    pub len: u32,
+    pub prefix: [u8; STRING_VIEW_PREFIX_LEN],
+    pub ptr: usize,
+}
+
+impl StringView {
+    pub fn as_utf8_str(&self) -> &str {
+        unsafe {
+            let slice = std::slice::from_raw_parts(self.ptr as *const u8, self.len as usize);
+            std::str::from_utf8_unchecked(slice)
+        }
+    }
+}
+
+impl Default for StringView {
+    fn default() -> Self {
+        Self {
+            len: 0,
+            prefix: [0; STRING_VIEW_PREFIX_LEN],
+            ptr: 0,
+        }
+    }
+}
+
+impl PartialEq for StringView {
+    fn eq(&self, other: &Self) -> bool {
+        if self.len != other.len {
+            return false;
+        }
+        if self.prefix != other.prefix {
+            return false;
+        }
+        self.as_utf8_str() == other.as_utf8_str()
+    }
+}
+
+pub trait NativeEqual {
+    fn is_equal(&self, other: &Self) -> bool;
+}
+
+macro_rules! make_native_equal {
+    ($native_ty:ty) => {
+        impl NativeEqual for $native_ty {
+            fn is_equal(&self, other: &Self) -> bool {
+                self == other
+            }
+        }
+    };
+}
+
+make_native_equal!(bool);
+make_native_equal!(i8);
+make_native_equal!(i16);
+make_native_equal!(i32);
+make_native_equal!(i64);
+make_native_equal!(i128);
+make_native_equal!(StringView);
+
+impl NativeEqual for f32 {
+    fn is_equal(&self, other: &Self) -> bool {
+        self.total_cmp(other) == cmp::Ordering::Equal
+    }
+}
+
+impl NativeEqual for f64 {
+    fn is_equal(&self, other: &Self) -> bool {
+        self.total_cmp(other) == cmp::Ordering::Equal
+    }
+}
+pub trait NativeType: Debug + Default + Copy + NativeEqual {}
+
+impl NativeType for bool {}
+impl NativeType for i8 {}
+impl NativeType for i16 {}
+impl NativeType for i32 {}
+impl NativeType for i64 {}
+impl NativeType for i128 {}
+impl NativeType for f32 {}
+impl NativeType for f64 {}
+impl NativeType for StringView {}
+
+/// A trait for Comet data type. This should only be used as generic parameter during method
+/// invocations.
+pub trait TypeTrait: 'static {
+    type Native: NativeType;
+    fn type_kind() -> TypeKind;
+}
+
+macro_rules! make_type_trait {
+    ($name:ident, $native_ty:ty, $kind:path) => {
+        pub struct $name {}
+        impl TypeTrait for $name {
+            type Native = $native_ty;
+            fn type_kind() -> TypeKind {
+                $kind
+            }
+        }
+    };
+}
+
+make_type_trait!(BoolType, bool, TypeKind::Boolean);
+make_type_trait!(ByteType, i8, TypeKind::Byte);
+make_type_trait!(ShortType, i16, TypeKind::Short);
+make_type_trait!(IntegerType, i32, TypeKind::Integer);
+make_type_trait!(LongType, i64, TypeKind::Long);
+make_type_trait!(FloatType, f32, TypeKind::Float);
+make_type_trait!(DoubleType, f64, TypeKind::Double);
+make_type_trait!(DecimalType, i128, TypeKind::Decimal);
+make_type_trait!(StringType, StringView, TypeKind::String);
+make_type_trait!(BinaryType, StringView, TypeKind::Binary);
+make_type_trait!(TimestampType, i64, TypeKind::Timestamp);
+make_type_trait!(DateType, i32, TypeKind::Date);
diff --git a/core/src/errors.rs b/core/src/errors.rs
new file mode 100644
index 000000000..5b53c654e
--- /dev/null
+++ b/core/src/errors.rs
@@ -0,0 +1,820 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Common Parquet errors and macros.
+
+use arrow::error::ArrowError;
+use datafusion_common::DataFusionError;
+use jni::errors::{Exception, ToException};
+use regex::Regex;
+use std::{
+    any::Any,
+    convert,
+    fmt::Write,
+    panic::{catch_unwind, UnwindSafe},
+    result, str,
+    str::Utf8Error,
+    sync::{Arc, Mutex},
+};
+
+// This is just a pointer. We'll be returning it from our function. We
+// can't return one of the objects with lifetime information because the
+// lifetime checker won't let us.
+use jni::sys::{jboolean, jbyte, jchar, jdouble, jfloat, jint, jlong, jobject, jshort};
+
+use crate::execution::operators::ExecutionError;
+use jni::JNIEnv;
+use lazy_static::lazy_static;
+use parquet::errors::ParquetError;
+use thiserror::Error;
+
+lazy_static! {
+    static ref PANIC_BACKTRACE: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum CometError {
+    #[error("Configuration Error: {0}")]
+    Config(String),
+
+    #[error("{0}")]
+    NullPointer(String),
+
+    #[error("Out of bounds{0}")]
+    IndexOutOfBounds(usize),
+
+    #[error("Comet Internal Error: {0}")]
+    Internal(String),
+
+    #[error(transparent)]
+    Arrow {
+        #[from]
+        source: ArrowError,
+    },
+
+    #[error(transparent)]
+    Parquet {
+        #[from]
+        source: ParquetError,
+    },
+
+    #[error(transparent)]
+    Expression {
+        #[from]
+        source: ExpressionError,
+    },
+
+    #[error(transparent)]
+    Execution {
+        #[from]
+        source: ExecutionError,
+    },
+
+    #[error(transparent)]
+    IO {
+        #[from]
+        source: std::io::Error,
+    },
+
+    #[error(transparent)]
+    NumberIntFormat {
+        #[from]
+        source: std::num::ParseIntError,
+    },
+
+    #[error(transparent)]
+    NumberFloatFormat {
+        #[from]
+        source: std::num::ParseFloatError,
+    },
+
+    #[error(transparent)]
+    Format {
+        #[from]
+        source: Utf8Error,
+    },
+
+    #[error(transparent)]
+    JNI {
+        #[from]
+        source: jni::errors::Error,
+    },
+
+    #[error("{msg}")]
+    Panic { msg: String },
+
+    #[error(transparent)]
+    DataFusion {
+        #[from]
+        source: DataFusionError,
+    },
+}
+
+pub fn init() {
+    std::panic::set_hook(Box::new(|_panic_info| {
+        // Capture the backtrace for a panic
+        *PANIC_BACKTRACE.lock().unwrap() =
+            Some(std::backtrace::Backtrace::force_capture().to_string());
+    }));
+}
+
+/// Converts the results from `panic::catch_unwind` (e.g. a panic) to a `CometError`
+impl convert::From<Box<dyn Any + Send>> for CometError {
+    fn from(e: Box<dyn Any + Send>) -> Self {
+        CometError::Panic {
+            msg: match e.downcast_ref::<&str>() {
+                Some(s) => s.to_string(),
+                None => match e.downcast_ref::<String>() {
+                    Some(msg) => msg.to_string(),
+                    None => "unknown panic".to_string(),
+                },
+            },
+        }
+    }
+}
+
+impl From<CometError> for DataFusionError {
+    fn from(value: CometError) -> Self {
+        match value {
+            CometError::DataFusion { source } => source,
+            _ => DataFusionError::Execution(value.to_string()),
+        }
+    }
+}
+
+impl jni::errors::ToException for CometError {
+    fn to_exception(&self) -> Exception {
+        match self {
+            CometError::IndexOutOfBounds(..) => Exception {
+                class: "java/lang/IndexOutOfBoundsException".to_string(),
+                msg: self.to_string(),
+            },
+            CometError::NullPointer(..) => Exception {
+                class: "java/lang/NullPointerException".to_string(),
+                msg: self.to_string(),
+            },
+            CometError::NumberIntFormat { source: s } => Exception {
+                class: "java/lang/NumberFormatException".to_string(),
+                msg: s.to_string(),
+            },
+            CometError::NumberFloatFormat { source: s } => Exception {
+                class: "java/lang/NumberFormatException".to_string(),
+                msg: s.to_string(),
+            },
+            CometError::IO { .. } => Exception {
+                class: "java/io/IOException".to_string(),
+                msg: self.to_string(),
+            },
+            CometError::Parquet { .. } => Exception {
+                class: "org/apache/comet/ParquetRuntimeException".to_string(),
+                msg: self.to_string(),
+            },
+            _other => Exception {
+                class: "org/apache/comet/CometRuntimeException".to_string(),
+                msg: self.to_string(),
+            },
+        }
+    }
+}
+
+/// Error returned when there is an error during executing an expression.
+#[derive(thiserror::Error, Debug)]
+pub enum ExpressionError {
+    /// Simple error
+    #[error("General expression error with reason {0}.")]
+    General(String),
+
+    /// Deserialization error
+    #[error("Fail to deserialize to native expression with reason {0}.")]
+    Deserialize(String),
+
+    /// Evaluation error
+    #[error("Fail to evaluate native expression with reason {0}.")]
+    Evaluation(String),
+
+    /// Error when processing Arrow array.
+    #[error("Fail to process Arrow array with reason {0}.")]
+    ArrowError(String),
+}
+
+/// A specialized `Result` for Comet errors.
+pub type CometResult<T> = result::Result<T, CometError>;
+
+// ----------------------------------------------------------------------
+// Convenient macros for different errors
+
+macro_rules! general_err {
+    ($fmt:expr, $($args:expr),*) => (crate::CometError::from(parquet::errors::ParquetError::General(format!($fmt, $($args),*))));
+}
+
+/// Returns the "default value" for a type.  This is used for JNI code in order to facilitate
+/// returning a value in cases where an exception is thrown.  This value will never be used, as the
+/// JVM will note the pending exception.
+///
+/// Default values are often some kind of initial value, identity value, or anything else that
+/// may make sense as a default.
+///
+/// NOTE: We can't just use [Default] since both the trait and the object are defined in other
+/// crates.
+/// See [Rust Compiler Error Index - E0117](https://doc.rust-lang.org/error-index.html#E0117)
+pub trait JNIDefault {
+    fn default() -> Self;
+}
+
+impl JNIDefault for jboolean {
+    fn default() -> jboolean {
+        0
+    }
+}
+
+impl JNIDefault for jbyte {
+    fn default() -> jbyte {
+        0
+    }
+}
+
+impl JNIDefault for jchar {
+    fn default() -> jchar {
+        0
+    }
+}
+
+impl JNIDefault for jdouble {
+    fn default() -> jdouble {
+        0.0
+    }
+}
+
+impl JNIDefault for jfloat {
+    fn default() -> jfloat {
+        0.0
+    }
+}
+
+impl JNIDefault for jint {
+    fn default() -> jint {
+        0
+    }
+}
+
+impl JNIDefault for jlong {
+    fn default() -> jlong {
+        0
+    }
+}
+
+/// The "default value" for all returned objects, such as [jstring], [jlongArray], etc.
+impl JNIDefault for jobject {
+    fn default() -> jobject {
+        std::ptr::null_mut()
+    }
+}
+
+impl JNIDefault for jshort {
+    fn default() -> jshort {
+        0
+    }
+}
+
+impl JNIDefault for () {
+    fn default() {}
+}
+
+// Unwrap the result returned from `panic::catch_unwind` when `Ok`, otherwise throw a
+// `RuntimeException` back to the calling Java.  Since a return result is required, use `JNIDefault`
+// to create a reasonable result.  This returned default value will be ignored due to the exception.
+pub fn unwrap_or_throw_default<T: JNIDefault>(
+    env: &JNIEnv,
+    result: std::result::Result<T, CometError>,
+) -> T {
+    match result {
+        Ok(value) => value,
+        Err(err) => {
+            let backtrace = match err {
+                CometError::Panic { msg: _ } => PANIC_BACKTRACE.lock().unwrap().take(),
+                _ => None,
+            };
+            throw_exception(env, &err, backtrace);
+            T::default()
+        }
+    }
+}
+
+fn throw_exception<E: ToException>(env: &JNIEnv, error: &E, backtrace: Option<String>) {
+    // If there isn't already an exception?
+    if env.exception_check().is_ok() {
+        // ... then throw new exception
+        let exception = error.to_exception();
+        match backtrace {
+            Some(backtrace_string) => env.throw_new(
+                exception.class,
+                to_stacktrace_string(exception.msg, backtrace_string).unwrap(),
+            ),
+            _ => env.throw_new(exception.class, exception.msg),
+        }
+        .expect("Thrown exception")
+    }
+}
+
+#[derive(Debug, Error)]
+enum StacktraceError {
+    #[error("Unable to initialize message: {0}")]
+    Message(String),
+    #[error("Unable to initialize backtrace regex: {0}")]
+    Regex(#[from] regex::Error),
+    #[error("Required field missing: {0}")]
+    Required_Field(String),
+    #[error("Unable to format stacktrace element: {0}")]
+    Element(#[from] std::fmt::Error),
+}
+
+fn to_stacktrace_string(msg: String, backtrace_string: String) -> Result<String, StacktraceError> {
+    let mut res = String::new();
+    write!(&mut res, "{}", msg).map_err(|error| StacktraceError::Message(error.to_string()))?;
+
+    // Use multi-line mode and named capture groups to identify the following stacktrace fields:
+    // - dc = declaredClass
+    // - mn = methodName
+    // - fn = fileName (optional)
+    // - line = file line number (optional)
+    // - col = file col number within the line (optional)
+    let re = Regex::new(
+        r"(?m)^\s*\d+: (?<dc>.*?)(?<mn>[^:]+)\n(\s*at\s+(?<fn>[^:]+):(?<line>\d+):(?<col>\d+)$)?",
+    )?;
+    for c in re.captures_iter(backtrace_string.as_str()) {
+        write!(
+            &mut res,
+            "\n        at {}{}({}:{})",
+            c.name("dc")
+                .ok_or_else(|| StacktraceError::Required_Field("declared class".to_string()))?
+                .as_str(),
+            c.name("mn")
+                .ok_or_else(|| StacktraceError::Required_Field("method name".to_string()))?
+                .as_str(),
+            // There are internal calls within the backtrace that don't provide file information
+            c.name("fn").map(|m| m.as_str()).unwrap_or("__internal__"),
+            c.name("line")
+                .map(|m| m.as_str().parse().expect("numeric line number"))
+                .unwrap_or(0)
+        )?;
+    }
+
+    Ok(res)
+}
+
+fn flatten<T, E>(result: Result<Result<T, E>, E>) -> Result<T, E> {
+    result.and_then(convert::identity)
+}
+
+// It is currently undefined behavior to unwind from Rust code into foreign code, so we can wrap
+// our JNI functions and turn these panics into a `RuntimeException`.
+pub fn try_or_throw<T, F>(env: JNIEnv, f: F) -> T
+where
+    T: JNIDefault,
+    F: FnOnce() -> T + UnwindSafe,
+{
+    unwrap_or_throw_default(&env, catch_unwind(f).map_err(CometError::from))
+}
+
+// This is a duplicate of `try_unwrap_or_throw`, which is used to work around Arrow's lack of
+// `UnwindSafe` handling.
+pub fn try_assert_unwind_safe_or_throw<T, F>(env: JNIEnv, f: F) -> T
+where
+    T: JNIDefault,
+    F: FnOnce() -> Result<T, CometError>,
+{
+    unwrap_or_throw_default(
+        &env,
+        flatten(catch_unwind(std::panic::AssertUnwindSafe(f)).map_err(CometError::from)),
+    )
+}
+
+// It is currently undefined behavior to unwind from Rust code into foreign code, so we can wrap
+// our JNI functions and turn these panics into a `RuntimeException`.
+pub fn try_unwrap_or_throw<T, F>(env: JNIEnv, f: F) -> T
+where
+    T: JNIDefault,
+    F: FnOnce() -> Result<T, CometError> + UnwindSafe,
+{
+    unwrap_or_throw_default(&env, flatten(catch_unwind(f).map_err(CometError::from)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::{
+        fs::File,
+        io,
+        io::Read,
+        path::PathBuf,
+        sync::{Arc, Once},
+    };
+
+    use jni::{
+        objects::{JClass, JObject, JString, JThrowable},
+        sys::{jintArray, jstring},
+        AttachGuard, InitArgsBuilder, JNIEnv, JNIVersion, JavaVM,
+    };
+
+    use assertables::{
+        assert_contains, assert_contains_as_result, assert_starts_with,
+        assert_starts_with_as_result,
+    };
+
+    pub fn jvm() -> &'static Arc<JavaVM> {
+        static mut JVM: Option<Arc<JavaVM>> = None;
+        static INIT: Once = Once::new();
+
+        // Capture panic backtraces
+        init();
+
+        INIT.call_once(|| {
+            // Add common classes to the classpath in so that we can find CometException
+            let mut common_classes = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+            common_classes.push("../common/target/classes");
+            let mut class_path = common_classes
+                .as_path()
+                .to_str()
+                .expect("common classes as an str")
+                .to_string();
+            class_path.insert_str(0, "-Djava.class.path=");
+
+            // Build the VM properties
+            let jvm_args = InitArgsBuilder::new()
+                // Pass the JNI API version (default is 8)
+                .version(JNIVersion::V8)
+                // You can additionally pass any JVM options (standard, like a system property,
+                // or VM-specific).
+                // Here we enable some extra JNI checks useful during development
+                .option("-Xcheck:jni")
+                .option(class_path.as_str())
+                .build()
+                .unwrap_or_else(|e| panic!("{:#?}", e));
+
+            let jvm = JavaVM::new(jvm_args).unwrap_or_else(|e| panic!("{:#?}", e));
+
+            unsafe {
+                JVM = Some(Arc::new(jvm));
+            }
+        });
+
+        unsafe { JVM.as_ref().unwrap() }
+    }
+
+    fn attach_current_thread() -> AttachGuard<'static> {
+        jvm().attach_current_thread().expect("Unable to attach JVM")
+    }
+
+    #[test]
+    pub fn error_from_panic() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        try_or_throw(env, || {
+            panic!("oops!");
+        });
+
+        assert_pending_java_exception_detailed(
+            &env,
+            Some("java/lang/RuntimeException"),
+            Some("oops!"),
+        );
+    }
+
+    // Verify that functions that return an object are handled correctly.  This is basically
+    // a test of the "happy path".
+    #[test]
+    pub fn object_result() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        let clazz = env.find_class("java/lang/Object").unwrap();
+        let input = env.new_string("World".to_string()).unwrap();
+        let actual = Java_Errors_hello(env, clazz, input);
+
+        let actual_string = String::from(env.get_string(actual.into()).unwrap().to_str().unwrap());
+        assert_eq!("Hello, World!", actual_string);
+    }
+
+    // Verify that functions that return an object can handle throwing exceptions.  The test
+    // causes an exception by passing a `null` where a string value is expected.
+    #[test]
+    pub fn object_panic_exception() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let input = JString::from(JObject::null());
+        let _actual = Java_Errors_hello(env, class, input);
+
+        assert!(env.exception_check().unwrap());
+        let exception = env.exception_occurred().expect("Unable to get exception");
+        env.exception_clear().unwrap();
+
+        assert_exception_message_with_stacktrace(
+            &env,
+            exception,
+            "Couldn't get java string!: NullPtr(\"get_string obj argument\")",
+            "at Java_Errors_hello(",
+        );
+    }
+
+    // Verify that functions that return an native time are handled correctly.  This is basically
+    // a test of the "happy path".
+    #[test]
+    pub fn jlong_result() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let a: jlong = 6;
+        let b: jlong = 3;
+        let actual = Java_Errors_div(env, class, a, b);
+
+        assert_eq!(2, actual);
+    }
+
+    // Verify that functions that return an array can handle throwing exceptions.  The test
+    // causes an exception by dividing by zero.
+    #[test]
+    pub fn jlong_panic_exception() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let a: jlong = 6;
+        let b: jlong = 0;
+        let _actual = Java_Errors_div(env, class, a, b);
+
+        assert_pending_java_exception_detailed(
+            &env,
+            Some("java/lang/RuntimeException"),
+            Some("attempt to divide by zero"),
+        );
+    }
+
+    // Verify that functions that return an native time are handled correctly.  This is basically
+    // a test of the "happy path".
+    #[test]
+    pub fn jlong_result_ok() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let a: JString = env.new_string("9".to_string()).unwrap();
+        let b: JString = env.new_string("3".to_string()).unwrap();
+        let actual = Java_Errors_div_with_parse(env, class, a, b);
+
+        assert_eq!(3, actual);
+    }
+
+    // Verify that functions that return an native time are handled correctly.  This is basically
+    // a test of the "happy path".
+    #[test]
+    pub fn jlong_result_err() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let a: JString = env.new_string("NaN".to_string()).unwrap();
+        let b: JString = env.new_string("3".to_string()).unwrap();
+        let _actual = Java_Errors_div_with_parse(env, class, a, b);
+
+        assert_pending_java_exception_detailed(
+            &env,
+            Some("java/lang/NumberFormatException"),
+            Some("invalid digit found in string"),
+        );
+    }
+
+    // Verify that functions that return an array are handled correctly.  This is basically
+    // a test of the "happy path".
+    #[test]
+    pub fn jint_array_result() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let buf = [2, 4, 6];
+        let input = env.new_int_array(3).unwrap();
+        env.set_int_array_region(input, 0, &buf).unwrap();
+        let actual = Java_Errors_array_div(env, class, input, 2);
+
+        let mut buf: [i32; 3] = [0; 3];
+        env.get_int_array_region(actual, 0, &mut buf).unwrap();
+        assert_eq!([1, 2, 3], buf);
+    }
+
+    // Verify that functions that return an array can handle throwing exceptions.  The test
+    // causes an exception by dividing by zero.
+    #[test]
+    pub fn jint_array_panic_exception() {
+        let _guard = attach_current_thread();
+        let env = jvm().get_env().unwrap();
+
+        // Class java.lang.object is just a stand-in
+        let class = env.find_class("java/lang/Object").unwrap();
+        let buf = [2, 4, 6];
+        let input = env.new_int_array(3).unwrap();
+        env.set_int_array_region(input, 0, &buf).unwrap();
+        let _actual = Java_Errors_array_div(env, class, input, 0);
+
+        assert_pending_java_exception_detailed(
+            &env,
+            Some("java/lang/RuntimeException"),
+            Some("attempt to divide by zero"),
+        );
+    }
+
+    /// Test that conversion of a serialized backtrace to an equivalent stacktrace message.
+    ///
+    /// See [`object_panic_exception`] for a test which involves generating a panic and verifying
+    /// that the resulting stack trace includes the offending call.
+    #[test]
+    pub fn stacktrace_string() {
+        // Setup: Start with a backtrace that includes all of the expected scenarios, including
+        // cases where the file and location are not provided as part of the backtrace capture
+        let backtrace_string = read_resource("testdata/backtrace.txt").expect("backtrace content");
+
+        // Test: Reformat the serialized backtrace as a multi-line message which includes the
+        // backtrace formatted as a stacktrace
+        let stacktrace_string =
+            to_stacktrace_string("Some Error Message".to_string(), backtrace_string).unwrap();
+
+        // Verify: The message matches the expected output.  Trim the expected string to remove
+        // the carriage return
+        let expected_string = read_resource("testdata/stacktrace.txt").expect("stacktrace content");
+        assert_eq!(expected_string.trim(), stacktrace_string.as_str());
+    }
+
+    fn read_resource(path: &str) -> Result<String, io::Error> {
+        let mut path_buf = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+        path_buf.push(path);
+
+        let mut f = File::open(path_buf.as_path())?;
+        let mut s = String::new();
+        f.read_to_string(&mut s)?;
+        Ok(s)
+    }
+
+    // Example of a simple JNI "Hello World" program.  It can be used to demonstrate:
+    // * returning an object
+    // * throwing an exception from `.expect()`
+    #[no_mangle]
+    pub extern "system" fn Java_Errors_hello(
+        env: JNIEnv,
+        _class: JClass,
+        input: JString,
+    ) -> jstring {
+        try_or_throw(env, || {
+            let input: String = env
+                .get_string(input)
+                .expect("Couldn't get java string!")
+                .into();
+
+            let output = env
+                .new_string(format!("Hello, {}!", input))
+                .expect("Couldn't create java string!");
+
+            output.into_inner()
+        })
+    }
+
+    // Example of a simple JNI function that divides.  It can be used to demonstrate:
+    // * returning an native type
+    // * throwing an exception when dividing by zero
+    #[no_mangle]
+    pub extern "system" fn Java_Errors_div(
+        env: JNIEnv,
+        _class: JClass,
+        a: jlong,
+        b: jlong,
+    ) -> jlong {
+        try_or_throw(env, || a / b)
+    }
+
+    #[no_mangle]
+    pub extern "system" fn Java_Errors_div_with_parse(
+        env: JNIEnv,
+        _class: JClass,
+        a: JString,
+        b: JString,
+    ) -> jlong {
+        try_unwrap_or_throw(env, || {
+            let a_value: i64 = env.get_string(a)?.to_str()?.parse()?;
+            let b_value: i64 = env.get_string(b)?.to_str()?.parse()?;
+            Ok(a_value / b_value)
+        })
+    }
+
+    // Example of a simple JNI function that divides.  It can be used to demonstrate:
+    // * returning an array
+    // * throwing an exception when dividing by zero
+    #[no_mangle]
+    pub extern "system" fn Java_Errors_array_div(
+        env: JNIEnv,
+        _class: JClass,
+        input: jintArray,
+        divisor: jint,
+    ) -> jintArray {
+        try_or_throw(env, || {
+            let mut input_buf: [jint; 3] = [0; 3];
+            env.get_int_array_region(input, 0, &mut input_buf).unwrap();
+
+            let buf = input_buf.map(|v| -> jint { v / divisor });
+
+            let result = env.new_int_array(3).unwrap();
+            env.set_int_array_region(result, 0, &buf).unwrap();
+            result
+        })
+    }
+
+    // Helper method that asserts there is a pending Java exception which is an `instance_of`
+    // `expected_type` with a message matching `expected_message` and clears it if any.
+    fn assert_pending_java_exception_detailed(
+        env: &JNIEnv,
+        expected_type: Option<&str>,
+        expected_message: Option<&str>,
+    ) {
+        assert!(env.exception_check().unwrap());
+        let exception = env.exception_occurred().expect("Unable to get exception");
+        env.exception_clear().unwrap();
+
+        if let Some(expected_type) = expected_type {
+            assert_exception_type(env, exception, expected_type);
+        }
+
+        if let Some(expected_message) = expected_message {
+            assert_exception_message(env, exception, expected_message);
+        }
+    }
+
+    // Asserts that exception is an `instance_of` `expected_type` type.
+    fn assert_exception_type(env: &JNIEnv, exception: JThrowable, expected_type: &str) {
+        if !env.is_instance_of(exception, expected_type).unwrap() {
+            let class: JClass = env.get_object_class(exception).unwrap();
+            let name = env
+                .call_method(class, "getName", "()Ljava/lang/String;", &[])
+                .unwrap()
+                .l()
+                .unwrap();
+            let class_name: String = env.get_string(name.into()).unwrap().into();
+            assert_eq!(class_name.replace('.', "/"), expected_type);
+        };
+    }
+
+    // Asserts that exception's message matches `expected_message`.
+    fn assert_exception_message(env: &JNIEnv, exception: JThrowable, expected_message: &str) {
+        let message = env
+            .call_method(exception, "getMessage", "()Ljava/lang/String;", &[])
+            .unwrap()
+            .l()
+            .unwrap();
+        let msg_rust: String = env.get_string(message.into()).unwrap().into();
+        println!("{}", msg_rust);
+        // Since panics result in multi-line messages which include the backtrace, just use the
+        // first line.
+        assert_starts_with!(msg_rust, expected_message);
+    }
+
+    // Asserts that exception's message matches `expected_message`.
+    fn assert_exception_message_with_stacktrace(
+        env: &JNIEnv,
+        exception: JThrowable,
+        expected_message: &str,
+        stacktrace_contains: &str,
+    ) {
+        let message = env
+            .call_method(exception, "getMessage", "()Ljava/lang/String;", &[])
+            .unwrap()
+            .l()
+            .unwrap();
+        let msg_rust: String = env.get_string(message.into()).unwrap().into();
+        // Since panics result in multi-line messages which include the backtrace, just use the
+        // first line.
+        assert_starts_with!(msg_rust, expected_message);
+
+        // Check that the stacktrace is included by checking for a specific element
+        assert_contains!(msg_rust, stacktrace_contains);
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/avg.rs b/core/src/execution/datafusion/expressions/avg.rs
new file mode 100644
index 000000000..dc2b34747
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/avg.rs
@@ -0,0 +1,340 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+use arrow::compute::sum;
+use arrow_array::{
+    builder::PrimitiveBuilder,
+    cast::AsArray,
+    types::{Float64Type, Int64Type},
+    Array, ArrayRef, ArrowNumericType, Int64Array, PrimitiveArray,
+};
+use arrow_schema::{DataType, Field};
+use datafusion::logical_expr::{type_coercion::aggregates::avg_return_type, Accumulator};
+use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_physical_expr::{
+    expressions::format_state_name, AggregateExpr, EmitTo, GroupsAccumulator, PhysicalExpr,
+};
+use std::{any::Any, sync::Arc};
+
+use arrow_array::ArrowNativeTypeOp;
+
+use DataType::*;
+
+/// AVG aggregate expression
+#[derive(Debug, Clone)]
+pub struct Avg {
+    name: String,
+    expr: Arc<dyn PhysicalExpr>,
+    input_data_type: DataType,
+    result_data_type: DataType,
+}
+
+impl Avg {
+    /// Create a new AVG aggregate function
+    pub fn new(expr: Arc<dyn PhysicalExpr>, name: impl Into<String>, data_type: DataType) -> Self {
+        let result_data_type = avg_return_type(&data_type).unwrap();
+
+        Self {
+            name: name.into(),
+            expr,
+            input_data_type: data_type,
+            result_data_type,
+        }
+    }
+}
+
+impl AggregateExpr for Avg {
+    /// Return a reference to Any that can be used for downcasting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn field(&self) -> Result<Field> {
+        Ok(Field::new(&self.name, self.result_data_type.clone(), true))
+    }
+
+    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+        // instantiate specialized accumulator based for the type
+        match (&self.input_data_type, &self.result_data_type) {
+            (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
+            _ => not_impl_err!(
+                "AvgAccumulator for ({} --> {})",
+                self.input_data_type,
+                self.result_data_type
+            ),
+        }
+    }
+
+    fn state_fields(&self) -> Result<Vec<Field>> {
+        Ok(vec![
+            Field::new(
+                format_state_name(&self.name, "sum"),
+                self.input_data_type.clone(),
+                true,
+            ),
+            Field::new(
+                format_state_name(&self.name, "count"),
+                DataType::Int64,
+                true,
+            ),
+        ])
+    }
+
+    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.expr.clone()]
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // instantiate specialized accumulator based for the type
+        match (&self.input_data_type, &self.result_data_type) {
+            (Float64, Float64) => Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
+                &self.input_data_type,
+                |sum: f64, count: i64| Ok(sum / count as f64),
+            ))),
+
+            _ => not_impl_err!(
+                "AvgGroupsAccumulator for ({} --> {})",
+                self.input_data_type,
+                self.result_data_type
+            ),
+        }
+    }
+}
+
+impl PartialEq<dyn Any> for Avg {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.name == x.name
+                    && self.input_data_type == x.input_data_type
+                    && self.result_data_type == x.result_data_type
+                    && self.expr.eq(&x.expr)
+            })
+            .unwrap_or(false)
+    }
+}
+
+/// An accumulator to compute the average
+#[derive(Debug, Default)]
+pub struct AvgAccumulator {
+    sum: Option<f64>,
+    count: i64,
+}
+
+impl Accumulator for AvgAccumulator {
+    fn state(&self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![
+            ScalarValue::Float64(self.sum),
+            ScalarValue::from(self.count),
+        ])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        let values = values[0].as_primitive::<Float64Type>();
+        self.count += (values.len() - values.null_count()) as i64;
+        let v = self.sum.get_or_insert(0.);
+        if let Some(x) = sum(values) {
+            *v += x;
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        // counts are summed
+        self.count += sum(states[1].as_primitive::<Int64Type>()).unwrap_or_default();
+
+        // sums are summed
+        if let Some(x) = sum(states[0].as_primitive::<Float64Type>()) {
+            let v = self.sum.get_or_insert(0.);
+            *v += x;
+        }
+        Ok(())
+    }
+
+    fn evaluate(&self) -> Result<ScalarValue> {
+        Ok(ScalarValue::Float64(
+            self.sum.map(|f| f / self.count as f64),
+        ))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self)
+    }
+}
+
+/// An accumulator to compute the average of `[PrimitiveArray<T>]`.
+/// Stores values as native types, and does overflow checking
+///
+/// F: Function that calculates the average value from a sum of
+/// T::Native and a total count
+#[derive(Debug)]
+struct AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, i64) -> Result<T::Native> + Send,
+{
+    /// The type of the returned average
+    return_data_type: DataType,
+
+    /// Count per group (use i64 to make Int64Array)
+    counts: Vec<i64>,
+
+    /// Sums per group, stored as the native type
+    sums: Vec<T::Native>,
+
+    /// Function that computes the final average (value / count)
+    avg_fn: F,
+}
+
+impl<T, F> AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, i64) -> Result<T::Native> + Send,
+{
+    pub fn new(return_data_type: &DataType, avg_fn: F) -> Self {
+        Self {
+            return_data_type: return_data_type.clone(),
+            counts: vec![],
+            sums: vec![],
+            avg_fn,
+        }
+    }
+}
+
+impl<T, F> GroupsAccumulator for AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, i64) -> Result<T::Native> + Send,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values[0].as_primitive::<T>();
+        let data = values.values();
+
+        // increment counts, update sums
+        self.counts.resize(total_num_groups, 0);
+        self.sums.resize(total_num_groups, T::default_value());
+
+        let iter = group_indices.iter().zip(data.iter());
+        if values.null_count() == 0 {
+            for (&group_index, &value) in iter {
+                let sum = &mut self.sums[group_index];
+                *sum = (*sum).add_wrapping(value);
+                self.counts[group_index] += 1;
+            }
+        } else {
+            for (idx, (&group_index, &value)) in iter.enumerate() {
+                if values.is_null(idx) {
+                    continue;
+                }
+                let sum = &mut self.sums[group_index];
+                *sum = (*sum).add_wrapping(value);
+
+                self.counts[group_index] += 1;
+            }
+        }
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 2, "two arguments to merge_batch");
+        // first batch is partial sums, second is counts
+        let partial_sums = values[0].as_primitive::<T>();
+        let partial_counts = values[1].as_primitive::<Int64Type>();
+        // update counts with partial counts
+        self.counts.resize(total_num_groups, 0);
+        let iter1 = group_indices.iter().zip(partial_counts.values().iter());
+        for (&group_index, &partial_count) in iter1 {
+            self.counts[group_index] += partial_count;
+        }
+
+        // update sums
+        self.sums.resize(total_num_groups, T::default_value());
+        let iter2 = group_indices.iter().zip(partial_sums.values().iter());
+        for (&group_index, &new_value) in iter2 {
+            let sum = &mut self.sums[group_index];
+            *sum = sum.add_wrapping(new_value);
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let counts = emit_to.take_needed(&mut self.counts);
+        let sums = emit_to.take_needed(&mut self.sums);
+        let mut builder = PrimitiveBuilder::<T>::with_capacity(sums.len());
+        let iter = sums.into_iter().zip(counts);
+
+        for (sum, count) in iter {
+            if count != 0 {
+                builder.append_value((self.avg_fn)(sum, count)?)
+            } else {
+                builder.append_null();
+            }
+        }
+        let array: PrimitiveArray<T> = builder.finish();
+
+        Ok(Arc::new(array))
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        assert!(
+            matches!(emit_to, EmitTo::All),
+            "EmitTo::First is not supported"
+        );
+        let counts = emit_to.take_needed(&mut self.counts);
+        let counts = Int64Array::new(counts.into(), None);
+
+        let sums = emit_to.take_needed(&mut self.sums);
+        let sums = PrimitiveArray::<T>::new(sums.into(), None)
+            .with_data_type(self.return_data_type.clone());
+
+        Ok(vec![
+            Arc::new(sums) as ArrayRef,
+            Arc::new(counts) as ArrayRef,
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.counts.capacity() * std::mem::size_of::<i64>()
+            + self.sums.capacity() * std::mem::size_of::<T>()
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/avg_decimal.rs b/core/src/execution/datafusion/expressions/avg_decimal.rs
new file mode 100644
index 000000000..dc7bf1599
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/avg_decimal.rs
@@ -0,0 +1,534 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+use arrow::{array::BooleanBufferBuilder, buffer::NullBuffer, compute::sum};
+use arrow_array::{
+    builder::PrimitiveBuilder,
+    cast::AsArray,
+    types::{Decimal128Type, Int64Type},
+    Array, ArrayRef, Decimal128Array, Int64Array, PrimitiveArray,
+};
+use arrow_schema::{DataType, Field};
+use datafusion::logical_expr::Accumulator;
+use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_physical_expr::{
+    expressions::format_state_name, AggregateExpr, EmitTo, GroupsAccumulator, PhysicalExpr,
+};
+use std::{any::Any, sync::Arc};
+
+use arrow_array::ArrowNativeTypeOp;
+use arrow_data::decimal::{
+    validate_decimal_precision, MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION,
+};
+
+use num::Integer;
+use DataType::*;
+
+/// AVG aggregate expression
+#[derive(Debug, Clone)]
+pub struct AvgDecimal {
+    name: String,
+    expr: Arc<dyn PhysicalExpr>,
+    sum_data_type: DataType,
+    result_data_type: DataType,
+}
+
+impl AvgDecimal {
+    /// Create a new AVG aggregate function
+    pub fn new(
+        expr: Arc<dyn PhysicalExpr>,
+        name: impl Into<String>,
+        result_type: DataType,
+        sum_type: DataType,
+    ) -> Self {
+        Self {
+            name: name.into(),
+            expr,
+            result_data_type: result_type,
+            sum_data_type: sum_type,
+        }
+    }
+}
+
+impl AggregateExpr for AvgDecimal {
+    /// Return a reference to Any that can be used for downcasting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn field(&self) -> Result<Field> {
+        Ok(Field::new(&self.name, self.result_data_type.clone(), true))
+    }
+
+    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+        match (&self.sum_data_type, &self.result_data_type) {
+            (Decimal128(sum_precision, sum_scale), Decimal128(target_precision, target_scale)) => {
+                Ok(Box::new(AvgDecimalAccumulator::new(
+                    *sum_scale,
+                    *sum_precision,
+                    *target_precision,
+                    *target_scale,
+                )))
+            }
+            _ => not_impl_err!(
+                "AvgDecimalAccumulator for ({} --> {})",
+                self.sum_data_type,
+                self.result_data_type
+            ),
+        }
+    }
+
+    fn state_fields(&self) -> Result<Vec<Field>> {
+        Ok(vec![
+            Field::new(
+                format_state_name(&self.name, "sum"),
+                self.sum_data_type.clone(),
+                true,
+            ),
+            Field::new(
+                format_state_name(&self.name, "count"),
+                DataType::Int64,
+                true,
+            ),
+        ])
+    }
+
+    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.expr.clone()]
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
+        None
+    }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // instantiate specialized accumulator based for the type
+        match (&self.sum_data_type, &self.result_data_type) {
+            (Decimal128(sum_precision, sum_scale), Decimal128(target_precision, target_scale)) => {
+                Ok(Box::new(AvgDecimalGroupsAccumulator::new(
+                    &self.result_data_type,
+                    &self.sum_data_type,
+                    *target_precision,
+                    *target_scale,
+                    *sum_precision,
+                    *sum_scale,
+                )))
+            }
+            _ => not_impl_err!(
+                "AvgDecimalGroupsAccumulator for ({} --> {})",
+                self.sum_data_type,
+                self.result_data_type
+            ),
+        }
+    }
+}
+
+impl PartialEq<dyn Any> for AvgDecimal {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.name == x.name
+                    && self.sum_data_type == x.sum_data_type
+                    && self.result_data_type == x.result_data_type
+                    && self.expr.eq(&x.expr)
+            })
+            .unwrap_or(false)
+    }
+}
+
+/// An accumulator to compute the average for decimals
+#[derive(Debug)]
+struct AvgDecimalAccumulator {
+    sum: Option<i128>,
+    count: i64,
+    is_empty: bool,
+    is_not_null: bool,
+    sum_scale: i8,
+    sum_precision: u8,
+    target_precision: u8,
+    target_scale: i8,
+}
+
+impl AvgDecimalAccumulator {
+    pub fn new(sum_scale: i8, sum_precision: u8, target_precision: u8, target_scale: i8) -> Self {
+        Self {
+            sum: None,
+            count: 0,
+            is_empty: true,
+            is_not_null: true,
+            sum_scale,
+            sum_precision,
+            target_precision,
+            target_scale,
+        }
+    }
+
+    fn update_single(&mut self, values: &Decimal128Array, idx: usize) {
+        let v = unsafe { values.value_unchecked(idx) };
+        let (new_sum, is_overflow) = match self.sum {
+            Some(sum) => sum.overflowing_add(v),
+            None => (v, false),
+        };
+
+        if is_overflow || validate_decimal_precision(new_sum, self.sum_precision).is_err() {
+            // Overflow: set buffer accumulator to null
+            self.is_not_null = false;
+            return;
+        }
+
+        self.sum = Some(new_sum);
+
+        if let Some(new_count) = self.count.checked_add(1) {
+            self.count = new_count;
+        } else {
+            self.is_not_null = false;
+            return;
+        }
+
+        self.is_not_null = true;
+    }
+}
+
+impl Accumulator for AvgDecimalAccumulator {
+    fn state(&self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![
+            ScalarValue::Decimal128(self.sum, self.sum_precision, self.sum_scale),
+            ScalarValue::from(self.count),
+        ])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if !self.is_empty && !self.is_not_null {
+            // This means there's a overflow in decimal, so we will just skip the rest
+            // of the computation
+            return Ok(());
+        }
+
+        let values = &values[0];
+        let data = values.as_primitive::<Decimal128Type>();
+
+        self.is_empty = self.is_empty && values.len() == values.null_count();
+
+        if values.null_count() == 0 {
+            for i in 0..data.len() {
+                self.update_single(data, i);
+            }
+        } else {
+            for i in 0..data.len() {
+                if data.is_null(i) {
+                    continue;
+                }
+                self.update_single(data, i);
+            }
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        // counts are summed
+        self.count += sum(states[1].as_primitive::<Int64Type>()).unwrap_or_default();
+
+        // sums are summed
+        if let Some(x) = sum(states[0].as_primitive::<Decimal128Type>()) {
+            let v = self.sum.get_or_insert(0);
+            let (result, overflowed) = v.overflowing_add(x);
+            if overflowed {
+                // Set to None if overflow happens
+                self.sum = None;
+            } else {
+                *v = result;
+            }
+        }
+        Ok(())
+    }
+
+    fn evaluate(&self) -> Result<ScalarValue> {
+        fn make_decimal128(value: Option<i128>, precision: u8, scale: i8) -> ScalarValue {
+            ScalarValue::Decimal128(value, precision, scale)
+        }
+
+        let scaler = 10_i128.pow(self.target_scale.saturating_sub(self.sum_scale) as u32);
+        let target_min = MIN_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1];
+        let target_max = MAX_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1];
+
+        let result = self
+            .sum
+            .map(|v| avg(v, self.count as i128, target_min, target_max, scaler));
+
+        match result {
+            Some(value) => Ok(make_decimal128(
+                value,
+                self.target_precision,
+                self.target_scale,
+            )),
+            _ => Ok(make_decimal128(
+                None,
+                self.target_precision,
+                self.target_scale,
+            )),
+        }
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self)
+    }
+}
+
+#[derive(Debug)]
+struct AvgDecimalGroupsAccumulator {
+    /// Tracks if the value is null
+    is_not_null: BooleanBufferBuilder,
+
+    // Tracks if the value is empty
+    is_empty: BooleanBufferBuilder,
+
+    /// The type of the avg return type
+    return_data_type: DataType,
+    target_precision: u8,
+    target_scale: i8,
+
+    /// Count per group (use i64 to make Int64Array)
+    counts: Vec<i64>,
+
+    /// Sums per group, stored as i128
+    sums: Vec<i128>,
+
+    /// The type of the sum
+    sum_data_type: DataType,
+    /// This is input_precision + 10 to be consistent with Spark
+    sum_precision: u8,
+    sum_scale: i8,
+}
+
+impl AvgDecimalGroupsAccumulator {
+    pub fn new(
+        return_data_type: &DataType,
+        sum_data_type: &DataType,
+        target_precision: u8,
+        target_scale: i8,
+        sum_precision: u8,
+        sum_scale: i8,
+    ) -> Self {
+        Self {
+            is_not_null: BooleanBufferBuilder::new(0),
+            is_empty: BooleanBufferBuilder::new(0),
+            return_data_type: return_data_type.clone(),
+            target_precision,
+            target_scale,
+            sum_data_type: sum_data_type.clone(),
+            sum_precision,
+            sum_scale,
+            counts: vec![],
+            sums: vec![],
+        }
+    }
+
+    fn is_overflow(&self, index: usize) -> bool {
+        !self.is_empty.get_bit(index) && !self.is_not_null.get_bit(index)
+    }
+
+    fn update_single(&mut self, group_index: usize, value: i128) {
+        if self.is_overflow(group_index) {
+            // This means there's a overflow in decimal, so we will just skip the rest
+            // of the computation
+            return;
+        }
+
+        self.is_empty.set_bit(group_index, false);
+        let (new_sum, is_overflow) = self.sums[group_index].overflowing_add(value);
+        self.counts[group_index] += 1;
+
+        if is_overflow || validate_decimal_precision(new_sum, self.sum_precision).is_err() {
+            // Overflow: set buffer accumulator to null
+            self.is_not_null.set_bit(group_index, false);
+            return;
+        }
+
+        self.sums[group_index] = new_sum;
+        self.is_not_null.set_bit(group_index, true)
+    }
+}
+
+fn ensure_bit_capacity(builder: &mut BooleanBufferBuilder, capacity: usize) {
+    if builder.len() < capacity {
+        let additional = capacity - builder.len();
+        builder.append_n(additional, true);
+    }
+}
+
+impl GroupsAccumulator for AvgDecimalGroupsAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values[0].as_primitive::<Decimal128Type>();
+        let data = values.values();
+
+        // increment counts, update sums
+        self.counts.resize(total_num_groups, 0);
+        self.sums.resize(total_num_groups, 0);
+        ensure_bit_capacity(&mut self.is_empty, total_num_groups);
+        ensure_bit_capacity(&mut self.is_not_null, total_num_groups);
+
+        let iter = group_indices.iter().zip(data.iter());
+        if values.null_count() == 0 {
+            for (&group_index, &value) in iter {
+                self.update_single(group_index, value);
+            }
+        } else {
+            for (idx, (&group_index, &value)) in iter.enumerate() {
+                if values.is_null(idx) {
+                    continue;
+                }
+                self.update_single(group_index, value);
+            }
+        }
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        _opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 2, "two arguments to merge_batch");
+        // first batch is partial sums, second is counts
+        let partial_sums = values[0].as_primitive::<Decimal128Type>();
+        let partial_counts = values[1].as_primitive::<Int64Type>();
+        // update counts with partial counts
+        self.counts.resize(total_num_groups, 0);
+        let iter1 = group_indices.iter().zip(partial_counts.values().iter());
+        for (&group_index, &partial_count) in iter1 {
+            self.counts[group_index] += partial_count;
+        }
+
+        // update sums
+        self.sums.resize(total_num_groups, 0);
+        let iter2 = group_indices.iter().zip(partial_sums.values().iter());
+        for (&group_index, &new_value) in iter2 {
+            let sum = &mut self.sums[group_index];
+            *sum = sum.add_wrapping(new_value);
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let counts = emit_to.take_needed(&mut self.counts);
+        let sums = emit_to.take_needed(&mut self.sums);
+
+        let mut builder = PrimitiveBuilder::<Decimal128Type>::with_capacity(sums.len())
+            .with_data_type(self.return_data_type.clone());
+        let iter = sums.into_iter().zip(counts);
+
+        let scaler = 10_i128.pow(self.target_scale.saturating_sub(self.sum_scale) as u32);
+        let target_min = MIN_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1];
+        let target_max = MAX_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1];
+
+        for (sum, count) in iter {
+            if count != 0 {
+                match avg(sum, count as i128, target_min, target_max, scaler) {
+                    Some(value) => {
+                        builder.append_value(value);
+                    }
+                    _ => {
+                        builder.append_null();
+                    }
+                }
+            } else {
+                builder.append_null();
+            }
+        }
+        let array: PrimitiveArray<Decimal128Type> = builder.finish();
+
+        Ok(Arc::new(array))
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        assert!(
+            matches!(emit_to, EmitTo::All),
+            "EmitTo::First is not supported"
+        );
+
+        let nulls = self.is_not_null.finish();
+        let nulls = Some(NullBuffer::new(nulls));
+
+        let counts = emit_to.take_needed(&mut self.counts);
+        let counts = Int64Array::new(counts.into(), nulls.clone());
+
+        let sums = emit_to.take_needed(&mut self.sums);
+        let sums =
+            Decimal128Array::new(sums.into(), nulls).with_data_type(self.sum_data_type.clone());
+
+        Ok(vec![
+            Arc::new(sums) as ArrayRef,
+            Arc::new(counts) as ArrayRef,
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.counts.capacity() * std::mem::size_of::<i64>()
+            + self.sums.capacity() * std::mem::size_of::<i128>()
+    }
+}
+
+/// Returns the `sum`/`count` as a i128 Decimal128 with
+/// target_scale and target_precision and return None if overflows.
+///
+/// * sum: The total sum value stored as Decimal128 with sum_scale
+/// * count: total count, stored as a i128 (*NOT* a Decimal128 value)
+/// * target_min: The minimum output value possible to represent with the target precision
+/// * target_max: The maximum output value possible to represent with the target precision
+/// * scaler: scale factor for avg
+#[inline(always)]
+fn avg(sum: i128, count: i128, target_min: i128, target_max: i128, scaler: i128) -> Option<i128> {
+    if let Some(value) = sum.checked_mul(scaler) {
+        // `sum / count` with ROUND_HALF_UP
+        let (div, rem) = value.div_rem(&count);
+        let half = count.div_ceil(2);
+        let half_neg = half.neg_wrapping();
+        let new_value = match value >= 0 {
+            true if rem >= half => div.add_wrapping(1),
+            false if rem <= half_neg => div.sub_wrapping(1),
+            _ => div,
+        };
+        if new_value >= target_min && new_value <= target_max {
+            Some(new_value)
+        } else {
+            None
+        }
+    } else {
+        None
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/bitwise_not.rs b/core/src/execution/datafusion/expressions/bitwise_not.rs
new file mode 100644
index 000000000..f9f8ee392
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/bitwise_not.rs
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    array::*,
+    datatypes::{DataType, Schema},
+    record_batch::RecordBatch,
+};
+use datafusion::{error::DataFusionError, logical_expr::ColumnarValue};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+
+macro_rules! compute_op {
+    ($OPERAND:expr, $DT:ident) => {{
+        let operand = $OPERAND
+            .as_any()
+            .downcast_ref::<$DT>()
+            .expect("compute_op failed to downcast array");
+        let result: $DT = operand.iter().map(|x| x.map(|y| !y)).collect();
+        Ok(Arc::new(result))
+    }};
+}
+
+/// BitwiseNot expression
+#[derive(Debug, Hash)]
+pub struct BitwiseNotExpr {
+    /// Input expression
+    arg: Arc<dyn PhysicalExpr>,
+}
+
+impl BitwiseNotExpr {
+    /// Create new bitwise not expression
+    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
+        Self { arg }
+    }
+
+    /// Get the input expression
+    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
+        &self.arg
+    }
+}
+
+impl std::fmt::Display for BitwiseNotExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "(~ {})", self.arg)
+    }
+}
+
+impl PhysicalExpr for BitwiseNotExpr {
+    /// Return a reference to Any that can be used for downcasting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
+        self.arg.data_type(input_schema)
+    }
+
+    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
+        self.arg.nullable(input_schema)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        let arg = self.arg.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let result: Result<ArrayRef> = match array.data_type() {
+                    DataType::Int8 => compute_op!(array, Int8Array),
+                    DataType::Int16 => compute_op!(array, Int16Array),
+                    DataType::Int32 => compute_op!(array, Int32Array),
+                    DataType::Int64 => compute_op!(array, Int64Array),
+                    _ => Err(DataFusionError::Execution(format!(
+                        "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed int",
+                        self,
+                        array.data_type(),
+                    ))),
+                };
+                result.map(ColumnarValue::Array)
+            }
+            ColumnarValue::Scalar(_) => Err(DataFusionError::Internal(
+                "shouldn't go to bitwise not scalar path".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(BitwiseNotExpr::new(children[0].clone())))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.arg.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+impl PartialEq<dyn Any> for BitwiseNotExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg))
+            .unwrap_or(false)
+    }
+}
+
+pub fn bitwise_not(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
+    Ok(Arc::new(BitwiseNotExpr::new(arg)))
+}
+
+fn scalar_bitwise_not(scalar: ScalarValue) -> Result<ScalarValue> {
+    match scalar {
+        ScalarValue::Int8(None)
+        | ScalarValue::Int16(None)
+        | ScalarValue::Int32(None)
+        | ScalarValue::Int64(None) => Ok(scalar),
+        ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(!v))),
+        ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(!v))),
+        ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(!v))),
+        ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(!v))),
+        value => Err(DataFusionError::Internal(format!(
+            "Can not run ! on scalar value {value:?}"
+        ))),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::datatypes::*;
+    use datafusion_common::{cast::as_int32_array, Result};
+    use datafusion_physical_expr::expressions::col;
+
+    use super::*;
+
+    #[test]
+    fn bitwise_not_op() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+
+        let expr = bitwise_not(col("a", &schema)?)?;
+
+        let input = Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            None,
+            Some(12345),
+            Some(89),
+            Some(-3456),
+        ]);
+        let expected = &Int32Array::from(vec![
+            Some(-2),
+            Some(-3),
+            None,
+            Some(-12346),
+            Some(-90),
+            Some(3455),
+        ]);
+
+        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?;
+
+        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_int32_array(&result).expect("failed to downcast to In32Array");
+        assert_eq!(result, expected);
+
+        Ok(())
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/cast.rs b/core/src/execution/datafusion/expressions/cast.rs
new file mode 100644
index 000000000..d8450686d
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/cast.rs
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    compute::{cast_with_options, CastOptions},
+    record_batch::RecordBatch,
+    util::display::FormatOptions,
+};
+use arrow_array::ArrayRef;
+use arrow_schema::{DataType, Schema};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{Result as DataFusionResult, ScalarValue};
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::datafusion::expressions::utils::{
+    array_with_timezone, down_cast_any_ref, spark_cast,
+};
+
+static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f");
+static CAST_OPTIONS: CastOptions = CastOptions {
+    safe: true,
+    format_options: FormatOptions::new()
+        .with_timestamp_tz_format(TIMESTAMP_FORMAT)
+        .with_timestamp_format(TIMESTAMP_FORMAT),
+};
+
+#[derive(Debug, Hash)]
+pub struct Cast {
+    pub child: Arc<dyn PhysicalExpr>,
+    pub data_type: DataType,
+
+    /// When cast from/to timezone related types, we need timezone, which will be resolved with
+    /// session local timezone by an analyzer in Spark.
+    pub timezone: String,
+}
+
+impl Cast {
+    pub fn new(child: Arc<dyn PhysicalExpr>, data_type: DataType, timezone: String) -> Self {
+        Self {
+            child,
+            data_type,
+            timezone,
+        }
+    }
+
+    pub fn new_without_timezone(child: Arc<dyn PhysicalExpr>, data_type: DataType) -> Self {
+        Self {
+            child,
+            data_type,
+            timezone: "".to_string(),
+        }
+    }
+
+    fn cast_array(&self, array: ArrayRef) -> DataFusionResult<ArrayRef> {
+        let array = array_with_timezone(array, self.timezone.clone(), Some(&self.data_type));
+        let from_type = array.data_type();
+        let cast_result = cast_with_options(&array, &self.data_type, &CAST_OPTIONS)?;
+        Ok(spark_cast(cast_result, from_type, &self.data_type))
+    }
+}
+
+impl Display for Cast {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Cast [data_type: {}, timezone: {}, child: {}]",
+            self.data_type, self.timezone, self.child
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for Cast {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.child.eq(&x.child)
+                    && self.timezone.eq(&x.timezone)
+                    && self.data_type.eq(&x.data_type)
+            })
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for Cast {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, _: &Schema) -> DataFusionResult<DataType> {
+        Ok(self.data_type.clone())
+    }
+
+    fn nullable(&self, _: &Schema) -> DataFusionResult<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> DataFusionResult<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(self.cast_array(array)?)),
+            ColumnarValue::Scalar(scalar) => {
+                // Note that normally CAST(scalar) should be fold in Spark JVM side. However, for
+                // some cases e.g., scalar subquery, Spark will not fold it, so we need to handle it
+                // here.
+                let array = scalar.to_array()?;
+                let scalar = ScalarValue::try_from_array(&self.cast_array(array)?, 0)?;
+                Ok(ColumnarValue::Scalar(scalar))
+            }
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(Cast::new(
+            children[0].clone(),
+            self.data_type.clone(),
+            self.timezone.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.data_type.hash(&mut s);
+        self.timezone.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/checkoverflow.rs b/core/src/execution/datafusion/expressions/checkoverflow.rs
new file mode 100644
index 000000000..1e4b5f333
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/checkoverflow.rs
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    array::{as_primitive_array, Array, ArrayRef, Decimal128Array, PrimitiveArray},
+    datatypes::{Decimal128Type, DecimalType},
+    record_batch::RecordBatch,
+};
+use arrow_schema::{DataType, Schema};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{DataFusionError, ScalarValue};
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+
+/// This is from Spark `CheckOverflow` expression. Spark `CheckOverflow` expression rounds decimals
+/// to given scale and check if the decimals can fit in given precision. As `cast` kernel rounds
+/// decimals already, Comet `CheckOverflow` expression only checks if the decimals can fit in the
+/// precision.
+#[derive(Debug, Hash)]
+pub struct CheckOverflow {
+    pub child: Arc<dyn PhysicalExpr>,
+    pub data_type: DataType,
+    pub fail_on_error: bool,
+}
+
+impl CheckOverflow {
+    pub fn new(child: Arc<dyn PhysicalExpr>, data_type: DataType, fail_on_error: bool) -> Self {
+        Self {
+            child,
+            data_type,
+            fail_on_error,
+        }
+    }
+}
+
+impl Display for CheckOverflow {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "CheckOverflow [datatype: {}, fail_on_error: {}, child: {}]",
+            self.data_type, self.fail_on_error, self.child
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for CheckOverflow {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.child.eq(&x.child)
+                    && self.data_type.eq(&x.data_type)
+                    && self.fail_on_error.eq(&x.fail_on_error)
+            })
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for CheckOverflow {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, _: &Schema) -> datafusion_common::Result<DataType> {
+        Ok(self.data_type.clone())
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array)
+                if matches!(array.data_type(), DataType::Decimal128(_, _)) =>
+            {
+                let (precision, scale) = match &self.data_type {
+                    DataType::Decimal128(p, s) => (p, s),
+                    dt => {
+                        return Err(DataFusionError::Execution(format!(
+                            "CheckOverflow expects only Decimal128, but got {:?}",
+                            dt
+                        )))
+                    }
+                };
+
+                let decimal_array = as_primitive_array::<Decimal128Type>(&array);
+
+                let casted_array = if self.fail_on_error {
+                    // Returning error if overflow
+                    let iter = decimal_array
+                        .iter()
+                        .map(|v| {
+                            v.map(|v| {
+                                Decimal128Type::validate_decimal_precision(v, *precision).map(|_| v)
+                            })
+                            .map_or(Ok(None), |r| r.map(Some))
+                        })
+                        .collect::<Result<Vec<_>, _>>()?
+                        .into_iter();
+                    unsafe { PrimitiveArray::<Decimal128Type>::from_trusted_len_iter(iter) }
+                } else {
+                    // Overflowing gets null value
+                    let iter = decimal_array.iter().map(|v| {
+                        v.and_then(|v| {
+                            Decimal128Type::validate_decimal_precision(v, *precision)
+                                .map(|_| v)
+                                .ok()
+                        })
+                    });
+                    unsafe { PrimitiveArray::<Decimal128Type>::from_trusted_len_iter(iter) }
+                };
+
+                let new_array = Decimal128Array::from(casted_array.to_data())
+                    .with_precision_and_scale(*precision, *scale)
+                    .map(|a| Arc::new(a) as ArrayRef)?;
+
+                Ok(ColumnarValue::Array(new_array))
+            }
+            ColumnarValue::Scalar(ScalarValue::Decimal128(v, precision, scale)) => {
+                // `fail_on_error` is only true when ANSI is enabled, which we don't support yet
+                // (Java side will simply fallback to Spark when it is enabled)
+                assert!(
+                    !self.fail_on_error,
+                    "fail_on_error (ANSI mode) is not supported yet"
+                );
+
+                let new_v: Option<i128> = v.and_then(|v| {
+                    Decimal128Type::validate_decimal_precision(v, precision)
+                        .map(|_| v)
+                        .ok()
+                });
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Decimal128(
+                    new_v, precision, scale,
+                )))
+            }
+            v => Err(DataFusionError::Execution(format!(
+                "CheckOverflow's child expression should be decimal array, but found {:?}",
+                v
+            ))),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(CheckOverflow::new(
+            children[0].clone(),
+            self.data_type.clone(),
+            self.fail_on_error,
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.data_type.hash(&mut s);
+        self.fail_on_error.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/if_expr.rs b/core/src/execution/datafusion/expressions/if_expr.rs
new file mode 100644
index 000000000..826f017c1
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/if_expr.rs
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    array::*,
+    compute::{and, is_null, kernels::zip::zip, not, or_kleene},
+    datatypes::{DataType, Schema},
+    record_batch::RecordBatch,
+};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{cast::as_boolean_array, Result};
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+
+#[derive(Debug, Hash)]
+pub struct IfExpr {
+    if_expr: Arc<dyn PhysicalExpr>,
+    true_expr: Arc<dyn PhysicalExpr>,
+    false_expr: Arc<dyn PhysicalExpr>,
+}
+
+impl std::fmt::Display for IfExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "If [if: {}, true_expr: {}, false_expr: {}]",
+            self.if_expr, self.true_expr, self.false_expr
+        )
+    }
+}
+
+impl IfExpr {
+    /// Create a new IF expression
+    pub fn new(
+        if_expr: Arc<dyn PhysicalExpr>,
+        true_expr: Arc<dyn PhysicalExpr>,
+        false_expr: Arc<dyn PhysicalExpr>,
+    ) -> Self {
+        Self {
+            if_expr,
+            true_expr,
+            false_expr,
+        }
+    }
+}
+
+impl PhysicalExpr for IfExpr {
+    /// Return a reference to Any that can be used for down-casting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
+        let data_type = self.true_expr.data_type(input_schema)?;
+        Ok(data_type)
+    }
+
+    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
+        if self.true_expr.nullable(_input_schema)? || self.true_expr.nullable(_input_schema)? {
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
+
+        // evaluate if condition on batch
+        let if_value = self.if_expr.evaluate_selection(batch, &remainder)?;
+        let if_value = if_value.into_array(batch.num_rows())?;
+        let if_value =
+            as_boolean_array(&if_value).expect("if expression did not return a BooleanArray");
+
+        let true_value = self.true_expr.evaluate_selection(batch, if_value)?;
+        let true_value = true_value.into_array(batch.num_rows())?;
+
+        remainder = and(
+            &remainder,
+            &or_kleene(&not(if_value)?, &is_null(if_value)?)?,
+        )?;
+
+        let false_value = self
+            .false_expr
+            .evaluate_selection(batch, &remainder)?
+            .into_array(batch.num_rows())?;
+        let current_value = zip(&remainder, &false_value, &true_value)?;
+
+        Ok(ColumnarValue::Array(current_value))
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![
+            self.true_expr.clone(),
+            self.if_expr.clone(),
+            self.false_expr.clone(),
+        ]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(IfExpr::new(
+            children[0].clone(),
+            children[1].clone(),
+            children[2].clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.if_expr.hash(&mut s);
+        self.true_expr.hash(&mut s);
+        self.false_expr.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+impl PartialEq<dyn Any> for IfExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.if_expr.eq(&x.if_expr)
+                    && self.true_expr.eq(&x.true_expr)
+                    && self.false_expr.eq(&x.false_expr)
+            })
+            .unwrap_or(false)
+    }
+}
+
+/// Create an If expression
+pub fn if_fn(
+    if_expr: Arc<dyn PhysicalExpr>,
+    true_expr: Arc<dyn PhysicalExpr>,
+    false_expr: Arc<dyn PhysicalExpr>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::{array::StringArray, datatypes::*};
+    use datafusion::logical_expr::Operator;
+    use datafusion_common::cast::as_int32_array;
+    use datafusion_physical_expr::expressions::{binary, col, lit};
+
+    use super::*;
+
+    #[test]
+    fn test_if_1() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
+        let a = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
+        let schema_ref = batch.schema();
+
+        // if a = 'foo' 123 else 999
+        let if_expr = binary(
+            col("a", &schema_ref)?,
+            Operator::Eq,
+            lit("foo"),
+            &schema_ref,
+        )?;
+        let true_expr = lit(123i32);
+        let false_expr = lit(999i32);
+
+        let expr = if_fn(if_expr, true_expr, false_expr);
+        let result = expr?.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(999)]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_if_2() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let a = Int32Array::from(vec![Some(1), Some(0), None, Some(5)]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
+        let schema_ref = batch.schema();
+
+        // if a >=1 123 else 999
+        let if_expr = binary(col("a", &schema_ref)?, Operator::GtEq, lit(1), &schema_ref)?;
+        let true_expr = lit(123i32);
+        let false_expr = lit(999i32);
+
+        let expr = if_fn(if_expr, true_expr, false_expr);
+        let result = expr?.evaluate(&batch)?.into_array(batch.num_rows())?;
+        let result = as_int32_array(&result)?;
+
+        let expected = &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(123)]);
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/mod.rs b/core/src/execution/datafusion/expressions/mod.rs
new file mode 100644
index 000000000..cfc312510
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/mod.rs
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Native DataFusion expressions
+
+pub mod bitwise_not;
+pub mod cast;
+pub mod checkoverflow;
+pub mod if_expr;
+mod normalize_nan;
+pub mod scalar_funcs;
+pub use normalize_nan::NormalizeNaNAndZero;
+pub mod avg;
+pub mod avg_decimal;
+pub mod strings;
+pub mod subquery;
+pub mod sum_decimal;
+pub mod temporal;
+mod utils;
diff --git a/core/src/execution/datafusion/expressions/normalize_nan.rs b/core/src/execution/datafusion/expressions/normalize_nan.rs
new file mode 100644
index 000000000..111a34d5d
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/normalize_nan.rs
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    array::{as_primitive_array, ArrayAccessor, ArrayIter, Float32Array, Float64Array},
+    datatypes::{ArrowNativeType, Float32Type, Float64Type},
+    record_batch::RecordBatch,
+};
+use arrow_schema::{DataType, Schema};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::datafusion::expressions::utils::down_cast_any_ref;
+
+#[derive(Debug, Hash)]
+pub struct NormalizeNaNAndZero {
+    pub data_type: DataType,
+    pub child: Arc<dyn PhysicalExpr>,
+}
+
+impl NormalizeNaNAndZero {
+    pub fn new(data_type: DataType, child: Arc<dyn PhysicalExpr>) -> Self {
+        Self { data_type, child }
+    }
+}
+
+impl PhysicalExpr for NormalizeNaNAndZero {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        self.child.data_type(input_schema)
+    }
+
+    fn nullable(&self, input_schema: &Schema) -> datafusion_common::Result<bool> {
+        self.child.nullable(input_schema)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let cv = self.child.evaluate(batch)?;
+        let array = cv.into_array(batch.num_rows())?;
+
+        match &self.data_type {
+            DataType::Float32 => {
+                let v = eval_typed(as_primitive_array::<Float32Type>(&array));
+                let new_array = Float32Array::from(v);
+                Ok(ColumnarValue::Array(Arc::new(new_array)))
+            }
+            DataType::Float64 => {
+                let v = eval_typed(as_primitive_array::<Float64Type>(&array));
+                let new_array = Float64Array::from(v);
+                Ok(ColumnarValue::Array(Arc::new(new_array)))
+            }
+            dt => panic!("Unexpected data type {:?}", dt),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        self.child.children()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(NormalizeNaNAndZero::new(
+            self.data_type.clone(),
+            children[0].clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.data_type.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+fn eval_typed<V: FloatDouble, T: ArrayAccessor<Item = V>>(input: T) -> Vec<Option<V>> {
+    let iter = ArrayIter::new(input);
+    iter.map(|o| {
+        o.map(|v| {
+            if v.is_nan() {
+                v.nan()
+            } else if v.is_neg_zero() {
+                v.zero()
+            } else {
+                v
+            }
+        })
+    })
+    .collect()
+}
+
+impl Display for NormalizeNaNAndZero {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "FloatNormalize [child: {}]", self.child)
+    }
+}
+
+impl PartialEq<dyn Any> for NormalizeNaNAndZero {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.data_type.eq(&x.data_type))
+            .unwrap_or(false)
+    }
+}
+
+trait FloatDouble: ArrowNativeType {
+    fn is_nan(&self) -> bool;
+    fn nan(&self) -> Self;
+    fn is_neg_zero(&self) -> bool;
+    fn zero(&self) -> Self;
+}
+
+impl FloatDouble for f32 {
+    fn is_nan(&self) -> bool {
+        f32::is_nan(*self)
+    }
+    fn nan(&self) -> Self {
+        f32::NAN
+    }
+    fn is_neg_zero(&self) -> bool {
+        *self == -0.0
+    }
+    fn zero(&self) -> Self {
+        0.0
+    }
+}
+impl FloatDouble for f64 {
+    fn is_nan(&self) -> bool {
+        f64::is_nan(*self)
+    }
+    fn nan(&self) -> Self {
+        f64::NAN
+    }
+    fn is_neg_zero(&self) -> bool {
+        *self == -0.0
+    }
+    fn zero(&self) -> Self {
+        0.0
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/scalar_funcs.rs b/core/src/execution/datafusion/expressions/scalar_funcs.rs
new file mode 100644
index 000000000..875956621
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/scalar_funcs.rs
@@ -0,0 +1,490 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{cmp::min, str::FromStr, sync::Arc};
+
+use arrow::{
+    array::{
+        ArrayRef, AsArray, Decimal128Builder, Float32Array, Float64Array, GenericStringArray,
+        Int16Array, Int32Array, Int64Array, Int64Builder, Int8Array, OffsetSizeTrait,
+    },
+    datatypes::{validate_decimal_precision, Decimal128Type, Int64Type},
+};
+use arrow_array::{Array, ArrowNativeTypeOp, Decimal128Array};
+use arrow_schema::DataType;
+use datafusion::{
+    logical_expr::{BuiltinScalarFunction, ScalarFunctionImplementation},
+    physical_plan::ColumnarValue,
+};
+use datafusion_common::{
+    cast::as_generic_string_array, internal_err, DataFusionError, Result as DataFusionResult,
+    ScalarValue,
+};
+use datafusion_physical_expr::{
+    execution_props::ExecutionProps,
+    functions::{create_physical_fun, make_scalar_function},
+    math_expressions,
+};
+use num::{BigInt, Signed, ToPrimitive};
+use unicode_segmentation::UnicodeSegmentation;
+
+/// Create a physical scalar function.
+pub fn create_comet_physical_fun(
+    fun_name: &str,
+    execution_props: &ExecutionProps,
+    data_type: DataType,
+) -> Result<ScalarFunctionImplementation, DataFusionError> {
+    match fun_name {
+        "ceil" => Ok(Arc::new(move |x| spark_ceil(x, &data_type))),
+        "floor" => Ok(Arc::new(move |x| spark_floor(x, &data_type))),
+        "rpad" => Ok(Arc::new(spark_rpad)),
+        "round" => Ok(Arc::new(move |x| spark_round(x, &data_type))),
+        "unscaled_value" => Ok(Arc::new(spark_unscaled_value)),
+        "make_decimal" => Ok(Arc::new(move |x| spark_make_decimal(x, &data_type))),
+        "decimal_div" => Ok(Arc::new(move |x| spark_decimal_div(x, &data_type))),
+        _ => {
+            let fun = &BuiltinScalarFunction::from_str(fun_name)?;
+            create_physical_fun(fun, execution_props)
+        }
+    }
+}
+
+#[inline]
+fn get_precision_scale(data_type: &DataType) -> (u8, i8) {
+    let DataType::Decimal128(precision, scale) = data_type else {
+        unreachable!()
+    };
+    (*precision, *scale)
+}
+
+macro_rules! downcast_compute_op {
+    ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident, $RESULT:ident) => {{
+        let n = $ARRAY.as_any().downcast_ref::<$TYPE>();
+        match n {
+            Some(array) => {
+                let res: $RESULT =
+                    arrow::compute::kernels::arity::unary(array, |x| x.$FUNC() as i64);
+                Ok(Arc::new(res))
+            }
+            _ => Err(DataFusionError::Internal(format!(
+                "Invalid data type for {}",
+                $NAME
+            ))),
+        }
+    }};
+}
+
+/// `ceil` function that simulates Spark `ceil` expression
+pub fn spark_ceil(
+    args: &[ColumnarValue],
+    data_type: &DataType,
+) -> Result<ColumnarValue, DataFusionError> {
+    let value = &args[0];
+    match value {
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Float32 => {
+                let result = downcast_compute_op!(array, "ceil", ceil, Float32Array, Int64Array);
+                Ok(ColumnarValue::Array(result?))
+            }
+            DataType::Float64 => {
+                let result = downcast_compute_op!(array, "ceil", ceil, Float64Array, Int64Array);
+                Ok(ColumnarValue::Array(result?))
+            }
+            DataType::Int64 => {
+                let result = array.as_any().downcast_ref::<Int64Array>().unwrap();
+                Ok(ColumnarValue::Array(Arc::new(result.clone())))
+            }
+            DataType::Decimal128(_, scale) if *scale > 0 => {
+                let f = decimal_ceil_f(scale);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_array(array, precision, scale, &f)
+            }
+            other => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {:?} for function ceil",
+                other,
+            ))),
+        },
+        ColumnarValue::Scalar(a) => match a {
+            ScalarValue::Float32(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                a.map(|x| x.ceil() as i64),
+            ))),
+            ScalarValue::Float64(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                a.map(|x| x.ceil() as i64),
+            ))),
+            ScalarValue::Int64(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(a.map(|x| x)))),
+            ScalarValue::Decimal128(a, _, scale) if *scale > 0 => {
+                let f = decimal_ceil_f(scale);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_scalar(a, precision, scale, &f)
+            }
+            _ => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {:?} for function ceil",
+                value.data_type(),
+            ))),
+        },
+    }
+}
+
+/// `floor` function that simulates Spark `floor` expression
+pub fn spark_floor(
+    args: &[ColumnarValue],
+    data_type: &DataType,
+) -> Result<ColumnarValue, DataFusionError> {
+    let value = &args[0];
+    match value {
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Float32 => {
+                let result = downcast_compute_op!(array, "floor", floor, Float32Array, Int64Array);
+                Ok(ColumnarValue::Array(result?))
+            }
+            DataType::Float64 => {
+                let result = downcast_compute_op!(array, "floor", floor, Float64Array, Int64Array);
+                Ok(ColumnarValue::Array(result?))
+            }
+            DataType::Int64 => {
+                let result = array.as_any().downcast_ref::<Int64Array>().unwrap();
+                Ok(ColumnarValue::Array(Arc::new(result.clone())))
+            }
+            DataType::Decimal128(_, scale) if *scale > 0 => {
+                let f = decimal_floor_f(scale);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_array(array, precision, scale, &f)
+            }
+            other => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {:?} for function floor",
+                other,
+            ))),
+        },
+        ColumnarValue::Scalar(a) => match a {
+            ScalarValue::Float32(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                a.map(|x| x.floor() as i64),
+            ))),
+            ScalarValue::Float64(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                a.map(|x| x.floor() as i64),
+            ))),
+            ScalarValue::Int64(a) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(a.map(|x| x)))),
+            ScalarValue::Decimal128(a, _, scale) if *scale > 0 => {
+                let f = decimal_floor_f(scale);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_scalar(a, precision, scale, &f)
+            }
+            _ => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {:?} for function floor",
+                value.data_type(),
+            ))),
+        },
+    }
+}
+
+pub fn spark_unscaled_value(args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
+    match &args[0] {
+        ColumnarValue::Scalar(v) => match v {
+            ScalarValue::Decimal128(d, _, _) => Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                d.map(|n| n as i64),
+            ))),
+            dt => internal_err!("Expected Decimal128 but found {dt:}"),
+        },
+        ColumnarValue::Array(a) => {
+            let arr = a.as_primitive::<Decimal128Type>();
+            let mut result = Int64Builder::new();
+            for v in arr.into_iter() {
+                result.append_option(v.map(|v| v as i64));
+            }
+            Ok(ColumnarValue::Array(Arc::new(result.finish())))
+        }
+    }
+}
+
+pub fn spark_make_decimal(
+    args: &[ColumnarValue],
+    data_type: &DataType,
+) -> DataFusionResult<ColumnarValue> {
+    let (precision, scale) = get_precision_scale(data_type);
+    match &args[0] {
+        ColumnarValue::Scalar(v) => match v {
+            ScalarValue::Int64(n) => Ok(ColumnarValue::Scalar(ScalarValue::Decimal128(
+                long_to_decimal(n, precision),
+                precision,
+                scale,
+            ))),
+            sv => internal_err!("Expected Int64 but found {sv:?}"),
+        },
+        ColumnarValue::Array(a) => {
+            let arr = a.as_primitive::<Int64Type>();
+            let mut result = Decimal128Builder::new();
+            for v in arr.into_iter() {
+                result.append_option(long_to_decimal(&v, precision))
+            }
+            let result_type = DataType::Decimal128(precision, scale);
+
+            Ok(ColumnarValue::Array(Arc::new(
+                result.finish().with_data_type(result_type),
+            )))
+        }
+    }
+}
+
+/// Convert the input long to decimal with the given maximum precision. If overflows, returns null
+/// instead.
+#[inline]
+fn long_to_decimal(v: &Option<i64>, precision: u8) -> Option<i128> {
+    match v {
+        Some(v) if validate_decimal_precision(*v as i128, precision).is_ok() => Some(*v as i128),
+        _ => None,
+    }
+}
+
+#[inline]
+fn decimal_ceil_f(scale: &i8) -> impl Fn(i128) -> i128 {
+    let div = 10_i128.pow_wrapping(*scale as u32);
+    move |x: i128| x.div_ceil(div)
+}
+
+#[inline]
+fn decimal_floor_f(scale: &i8) -> impl Fn(i128) -> i128 {
+    let div = 10_i128.pow_wrapping(*scale as u32);
+    move |x: i128| x.div_floor(div)
+}
+
+// Spark uses BigDecimal. See RoundBase implementation in Spark. Instead, we do the same by
+// 1) add the half of divisor, 2) round down by division, 3) adjust precision by multiplication
+#[inline]
+fn decimal_round_f(scale: &i8, point: &i64) -> Box<dyn Fn(i128) -> i128> {
+    if *point < 0 {
+        if let Some(div) = 10_i128.checked_pow((-(*point) as u32) + (*scale as u32)) {
+            let half = div / 2;
+            let mul = 10_i128.pow_wrapping((-(*point)) as u32);
+            // i128 can hold 39 digits of a base 10 number, adding half will not cause overflow
+            Box::new(move |x: i128| (x + x.signum() * half) / div * mul)
+        } else {
+            Box::new(move |_: i128| 0)
+        }
+    } else {
+        let div = 10_i128.pow_wrapping((*scale as u32) - min(*scale as u32, *point as u32));
+        let half = div / 2;
+        Box::new(move |x: i128| (x + x.signum() * half) / div)
+    }
+}
+
+#[inline]
+fn make_decimal_array(
+    array: &ArrayRef,
+    precision: u8,
+    scale: i8,
+    f: &dyn Fn(i128) -> i128,
+) -> Result<ColumnarValue, DataFusionError> {
+    let array = array.as_primitive::<Decimal128Type>();
+    let result: Decimal128Array = arrow::compute::kernels::arity::unary(array, f);
+    let result = result.with_data_type(DataType::Decimal128(precision, scale));
+    Ok(ColumnarValue::Array(Arc::new(result)))
+}
+
+#[inline]
+fn make_decimal_scalar(
+    a: &Option<i128>,
+    precision: u8,
+    scale: i8,
+    f: &dyn Fn(i128) -> i128,
+) -> Result<ColumnarValue, DataFusionError> {
+    let result = ScalarValue::Decimal128(a.map(f), precision, scale);
+    Ok(ColumnarValue::Scalar(result))
+}
+
+macro_rules! integer_round {
+    ($X:expr, $DIV:expr, $HALF:expr) => {{
+        let rem = $X % $DIV;
+        if rem <= -$HALF {
+            ($X - rem).sub_wrapping($DIV)
+        } else if rem >= $HALF {
+            ($X - rem).add_wrapping($DIV)
+        } else {
+            $X - rem
+        }
+    }};
+}
+
+macro_rules! round_integer_array {
+    ($ARRAY:expr, $POINT:expr, $TYPE:ty, $NATIVE:ty) => {{
+        let array = $ARRAY.as_any().downcast_ref::<$TYPE>().unwrap();
+        let ten: $NATIVE = 10;
+        let result: $TYPE = if let Some(div) = ten.checked_pow((-(*$POINT)) as u32) {
+            let half = div / 2;
+            arrow::compute::kernels::arity::unary(array, |x| integer_round!(x, div, half))
+        } else {
+            arrow::compute::kernels::arity::unary(array, |_| 0)
+        };
+        Ok(ColumnarValue::Array(Arc::new(result)))
+    }};
+}
+
+macro_rules! round_integer_scalar {
+    ($SCALAR:expr, $POINT:expr, $TYPE:expr, $NATIVE:ty) => {{
+        let ten: $NATIVE = 10;
+        if let Some(div) = ten.checked_pow((-(*$POINT)) as u32) {
+            let half = div / 2;
+            Ok(ColumnarValue::Scalar($TYPE(
+                $SCALAR.map(|x| integer_round!(x, div, half)),
+            )))
+        } else {
+            Ok(ColumnarValue::Scalar($TYPE(Some(0))))
+        }
+    }};
+}
+
+/// `round` function that simulates Spark `round` expression
+fn spark_round(
+    args: &[ColumnarValue],
+    data_type: &DataType,
+) -> Result<ColumnarValue, DataFusionError> {
+    let value = &args[0];
+    let point = &args[1];
+    let ColumnarValue::Scalar(ScalarValue::Int64(Some(point))) = point else {
+        return internal_err!("Invalid point argument for Round(): {:#?}", point);
+    };
+    match value {
+        ColumnarValue::Array(array) => match array.data_type() {
+            DataType::Int64 if *point < 0 => round_integer_array!(array, point, Int64Array, i64),
+            DataType::Int32 if *point < 0 => round_integer_array!(array, point, Int32Array, i32),
+            DataType::Int16 if *point < 0 => round_integer_array!(array, point, Int16Array, i16),
+            DataType::Int8 if *point < 0 => round_integer_array!(array, point, Int8Array, i8),
+            DataType::Decimal128(_, scale) if *scale > 0 => {
+                let f = decimal_round_f(scale, point);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_array(array, precision, scale, &f)
+            }
+            _ => make_scalar_function(math_expressions::round)(args),
+        },
+        ColumnarValue::Scalar(a) => match a {
+            ScalarValue::Int64(a) if *point < 0 => {
+                round_integer_scalar!(a, point, ScalarValue::Int64, i64)
+            }
+            ScalarValue::Int32(a) if *point < 0 => {
+                round_integer_scalar!(a, point, ScalarValue::Int32, i32)
+            }
+            ScalarValue::Int16(a) if *point < 0 => {
+                round_integer_scalar!(a, point, ScalarValue::Int16, i16)
+            }
+            ScalarValue::Int8(a) if *point < 0 => {
+                round_integer_scalar!(a, point, ScalarValue::Int8, i8)
+            }
+            ScalarValue::Decimal128(a, _, scale) if *scale >= 0 => {
+                let f = decimal_round_f(scale, point);
+                let (precision, scale) = get_precision_scale(data_type);
+                make_decimal_scalar(a, precision, scale, &f)
+            }
+            _ => make_scalar_function(math_expressions::round)(args),
+        },
+    }
+}
+
+/// Similar to DataFusion `rpad`, but not to truncate when the string is already longer than length
+fn spark_rpad(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
+    match args {
+        [ColumnarValue::Array(array), ColumnarValue::Scalar(ScalarValue::Int32(Some(length)))] => {
+            match args[0].data_type() {
+                DataType::Utf8 => spark_rpad_internal::<i32>(array, *length),
+                DataType::LargeUtf8 => spark_rpad_internal::<i64>(array, *length),
+                // TODO: handle Dictionary types
+                other => Err(DataFusionError::Internal(format!(
+                    "Unsupported data type {other:?} for function rpad",
+                ))),
+            }
+        }
+        other => Err(DataFusionError::Internal(format!(
+            "Unsupported arguments {other:?} for function rpad",
+        ))),
+    }
+}
+
+fn spark_rpad_internal<T: OffsetSizeTrait>(
+    array: &ArrayRef,
+    length: i32,
+) -> Result<ColumnarValue, DataFusionError> {
+    let string_array = as_generic_string_array::<T>(array)?;
+
+    let result = string_array
+        .iter()
+        .map(|string| match string {
+            Some(string) => {
+                let length = if length < 0 { 0 } else { length as usize };
+                if length == 0 {
+                    Ok(Some("".to_string()))
+                } else {
+                    let graphemes = string.graphemes(true).collect::<Vec<&str>>();
+                    if length < graphemes.len() {
+                        Ok(Some(string.to_string()))
+                    } else {
+                        let mut s = string.to_string();
+                        s.push_str(" ".repeat(length - graphemes.len()).as_str());
+                        Ok(Some(s))
+                    }
+                }
+            }
+            _ => Ok(None),
+        })
+        .collect::<Result<GenericStringArray<T>, DataFusionError>>()?;
+    Ok(ColumnarValue::Array(Arc::new(result)))
+}
+
+// Let Decimal(p3, s3) as return type i.e. Decimal(p1, s1) / Decimal(p2, s2) = Decimal(p3, s3).
+// Conversely, Decimal(p1, s1) = Decimal(p2, s2) * Decimal(p3, s3). This means that, in order to
+// get enough scale that matches with Spark behavior, it requires to widen s1 to s2 + s3 + 1. Since
+// both s2 and s3 are 38 at max., s1 is 77 at max. DataFusion division cannot handle such scale >
+// Decimal256Type::MAX_SCALE. Therefore, we need to implement this decimal division using BigInt.
+fn spark_decimal_div(
+    args: &[ColumnarValue],
+    data_type: &DataType,
+) -> Result<ColumnarValue, DataFusionError> {
+    let left = &args[0];
+    let right = &args[1];
+    let (p3, s3) = get_precision_scale(data_type);
+
+    let (left, right): (ArrayRef, ArrayRef) = match (left, right) {
+        (ColumnarValue::Array(l), ColumnarValue::Array(r)) => (l.clone(), r.clone()),
+        (ColumnarValue::Scalar(l), ColumnarValue::Array(r)) => {
+            (l.to_array_of_size(r.len())?, r.clone())
+        }
+        (ColumnarValue::Array(l), ColumnarValue::Scalar(r)) => {
+            (l.clone(), r.to_array_of_size(l.len())?)
+        }
+        (ColumnarValue::Scalar(l), ColumnarValue::Scalar(r)) => (l.to_array()?, r.to_array()?),
+    };
+    let left = left.as_primitive::<Decimal128Type>();
+    let right = right.as_primitive::<Decimal128Type>();
+    let (_, s1) = get_precision_scale(left.data_type());
+    let (_, s2) = get_precision_scale(right.data_type());
+
+    let ten = BigInt::from(10);
+    let l_exp = ((s2 + s3 + 1) as u32).saturating_sub(s1 as u32);
+    let r_exp = (s1 as u32).saturating_sub((s2 + s3 + 1) as u32);
+    let l_mul = ten.pow(l_exp);
+    let r_mul = ten.pow(r_exp);
+    let five = BigInt::from(5);
+    let result: Decimal128Array = arrow::compute::kernels::arity::binary(left, right, |l, r| {
+        let l = BigInt::from(l) * &l_mul;
+        let r = BigInt::from(r) * &r_mul;
+        let div = &l / &r;
+        let res = if div.is_negative() {
+            div - &five
+        } else {
+            div + &five
+        } / &ten;
+        res.to_i128().unwrap_or(i128::MAX)
+    })?;
+    let result = result.with_data_type(DataType::Decimal128(p3, s3));
+    Ok(ColumnarValue::Array(Arc::new(result)))
+}
diff --git a/core/src/execution/datafusion/expressions/strings.rs b/core/src/execution/datafusion/expressions/strings.rs
new file mode 100644
index 000000000..ee9a22212
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/strings.rs
@@ -0,0 +1,305 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#![allow(deprecated)]
+
+use crate::execution::{
+    datafusion::expressions::utils::down_cast_any_ref,
+    kernels::strings::{string_space, substring},
+};
+use arrow::{
+    compute::{
+        contains_dyn, contains_utf8_scalar_dyn, ends_with_dyn, ends_with_utf8_scalar_dyn, like_dyn,
+        like_utf8_scalar_dyn, starts_with_dyn, starts_with_utf8_scalar_dyn,
+    },
+    record_batch::RecordBatch,
+};
+use arrow_schema::{DataType, Schema};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{DataFusionError, ScalarValue::Utf8};
+use datafusion_physical_expr::PhysicalExpr;
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+macro_rules! make_predicate_function {
+    ($name: ident, $kernel: ident, $str_scalar_kernel: ident) => {
+        #[derive(Debug, Hash)]
+        pub struct $name {
+            left: Arc<dyn PhysicalExpr>,
+            right: Arc<dyn PhysicalExpr>,
+        }
+
+        impl $name {
+            pub fn new(left: Arc<dyn PhysicalExpr>, right: Arc<dyn PhysicalExpr>) -> Self {
+                Self { left, right }
+            }
+        }
+
+        impl Display for $name {
+            fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+                write!(f, "$name [left: {}, right: {}]", self.left, self.right)
+            }
+        }
+
+        impl PartialEq<dyn Any> for $name {
+            fn eq(&self, other: &dyn Any) -> bool {
+                down_cast_any_ref(other)
+                    .downcast_ref::<Self>()
+                    .map(|x| self.left.eq(&x.left) && self.right.eq(&x.right))
+                    .unwrap_or(false)
+            }
+        }
+
+        impl PhysicalExpr for $name {
+            fn as_any(&self) -> &dyn Any {
+                self
+            }
+
+            fn data_type(&self, _: &Schema) -> datafusion_common::Result<DataType> {
+                Ok(DataType::Boolean)
+            }
+
+            fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+                Ok(true)
+            }
+
+            fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+                let left_arg = self.left.evaluate(batch)?;
+                let right_arg = self.right.evaluate(batch)?;
+
+                let array = match (left_arg, right_arg) {
+                    // array (op) scalar
+                    (ColumnarValue::Array(array), ColumnarValue::Scalar(Utf8(Some(string)))) => {
+                        $str_scalar_kernel(&array, string.as_str())
+                    }
+                    (ColumnarValue::Array(_), ColumnarValue::Scalar(other)) => {
+                        return Err(DataFusionError::Execution(format!(
+                            "Should be String but got: {:?}",
+                            other
+                        )))
+                    }
+                    // array (op) array
+                    (ColumnarValue::Array(array1), ColumnarValue::Array(array2)) => {
+                        $kernel(&array1, &array2)
+                    }
+                    // scalar (op) scalar should be folded at Spark optimizer
+                    _ => {
+                        return Err(DataFusionError::Execution(
+                            "Predicate on two literals should be folded at Spark".to_string(),
+                        ))
+                    }
+                }?;
+
+                Ok(ColumnarValue::Array(Arc::new(array)))
+            }
+
+            fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+                vec![self.left.clone(), self.right.clone()]
+            }
+
+            fn with_new_children(
+                self: Arc<Self>,
+                children: Vec<Arc<dyn PhysicalExpr>>,
+            ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+                Ok(Arc::new($name::new(
+                    children[0].clone(),
+                    children[1].clone(),
+                )))
+            }
+
+            fn dyn_hash(&self, state: &mut dyn Hasher) {
+                let mut s = state;
+                self.left.hash(&mut s);
+                self.right.hash(&mut s);
+                self.hash(&mut s);
+            }
+        }
+    };
+}
+
+make_predicate_function!(Like, like_dyn, like_utf8_scalar_dyn);
+
+make_predicate_function!(StartsWith, starts_with_dyn, starts_with_utf8_scalar_dyn);
+
+make_predicate_function!(EndsWith, ends_with_dyn, ends_with_utf8_scalar_dyn);
+
+make_predicate_function!(Contains, contains_dyn, contains_utf8_scalar_dyn);
+
+// make_predicate_function!(RLike, rlike_dyn, rlike_utf8_scalar_dyn);
+
+#[derive(Debug, Hash)]
+pub struct SubstringExec {
+    pub child: Arc<dyn PhysicalExpr>,
+    pub start: i64,
+    pub len: u64,
+}
+
+#[derive(Debug, Hash)]
+pub struct StringSpaceExec {
+    pub child: Arc<dyn PhysicalExpr>,
+}
+
+impl SubstringExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>, start: i64, len: u64) -> Self {
+        Self { child, start, len }
+    }
+}
+
+impl StringSpaceExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>) -> Self {
+        Self { child }
+    }
+}
+
+impl Display for SubstringExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "StringSpace [start: {}, len: {}, child: {}]",
+            self.start, self.len, self.child
+        )
+    }
+}
+
+impl Display for StringSpaceExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "StringSpace [child: {}] ", self.child)
+    }
+}
+
+impl PartialEq<dyn Any> for SubstringExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.start.eq(&x.start) && self.len.eq(&x.len))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for SubstringExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        self.child.data_type(input_schema)
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let result = substring(&array, self.start, self.len)?;
+
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Substring(scalar) should be fold in Spark JVM side.".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(SubstringExec::new(
+            children[0].clone(),
+            self.start,
+            self.len,
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.start.hash(&mut s);
+        self.len.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+impl PartialEq<dyn Any> for StringSpaceExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for StringSpaceExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        match self.child.data_type(input_schema)? {
+            DataType::Dictionary(key_type, _) => {
+                Ok(DataType::Dictionary(key_type, Box::new(DataType::Utf8)))
+            }
+            _ => Ok(DataType::Utf8),
+        }
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let result = string_space(&array)?;
+
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "StringSpace(scalar) should be fold in Spark JVM side.".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(StringSpaceExec::new(children[0].clone())))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/subquery.rs b/core/src/execution/datafusion/expressions/subquery.rs
new file mode 100644
index 000000000..a82fb357c
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/subquery.rs
@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::RecordBatch;
+use arrow_schema::{DataType, Schema, TimeUnit};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{internal_err, DataFusionError, ScalarValue};
+use datafusion_physical_expr::PhysicalExpr;
+use jni::sys::{jboolean, jbyte, jint, jlong, jshort};
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use crate::{
+    execution::{datafusion::expressions::utils::down_cast_any_ref, utils::bytes_to_i128},
+    jvm_bridge::{jni_static_call, BinaryWrapper, JVMClasses, StringWrapper},
+};
+
+#[derive(Debug, Hash)]
+pub struct Subquery {
+    /// The ID of the execution context that owns this subquery. We use this ID to retrieve the
+    /// subquery result.
+    exec_context_id: i64,
+    /// The ID of the subquery, we retrieve the subquery result from JVM using this ID.
+    pub id: i64,
+    /// The data type of the subquery result.
+    pub data_type: DataType,
+}
+
+impl Subquery {
+    pub fn new(exec_context_id: i64, id: i64, data_type: DataType) -> Self {
+        Self {
+            exec_context_id,
+            id,
+            data_type,
+        }
+    }
+}
+
+impl Display for Subquery {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Subquery [id: {}]", self.id)
+    }
+}
+
+impl PartialEq<dyn Any> for Subquery {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.id.eq(&x.id)
+                    && self.data_type.eq(&x.data_type)
+                    && self.exec_context_id.eq(&x.exec_context_id)
+            })
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for Subquery {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, _: &Schema) -> datafusion_common::Result<DataType> {
+        Ok(self.data_type.clone())
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, _: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let env = JVMClasses::get_env();
+
+        let is_null =
+            jni_static_call!(env, comet_exec.is_null(self.exec_context_id, self.id) -> jboolean)?;
+
+        if is_null > 0 {
+            return Ok(ColumnarValue::Scalar(ScalarValue::try_from(
+                &self.data_type,
+            )?));
+        }
+
+        match &self.data_type {
+            DataType::Boolean => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_bool(self.exec_context_id, self.id) -> jboolean
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(r > 0))))
+            }
+            DataType::Int8 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_byte(self.exec_context_id, self.id) -> jbyte
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Int8(Some(r))))
+            }
+            DataType::Int16 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_short(self.exec_context_id, self.id) -> jshort
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Int16(Some(r))))
+            }
+            DataType::Int32 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_int(self.exec_context_id, self.id) -> jint
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(r))))
+            }
+            DataType::Int64 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_long(self.exec_context_id, self.id) -> jlong
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Int64(Some(r))))
+            }
+            DataType::Float32 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_float(self.exec_context_id, self.id) -> f32
+                )?;
+                Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(r))))
+            }
+            DataType::Float64 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_double(self.exec_context_id, self.id) -> f64
+                )?;
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(r))))
+            }
+            DataType::Decimal128(p, s) => {
+                let bytes = jni_static_call!(env,
+                    comet_exec.get_decimal(self.exec_context_id, self.id) -> BinaryWrapper
+                )?;
+
+                let slice = env.convert_byte_array((*bytes.get()).into_inner()).unwrap();
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Decimal128(
+                    Some(bytes_to_i128(&slice)),
+                    *p,
+                    *s,
+                )))
+            }
+            DataType::Date32 => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_int(self.exec_context_id, self.id) -> jint
+                )?;
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(r))))
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, timezone) => {
+                let r = jni_static_call!(env,
+                    comet_exec.get_long(self.exec_context_id, self.id) -> jlong
+                )?;
+
+                Ok(ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
+                    Some(r),
+                    timezone.clone(),
+                )))
+            }
+            DataType::Utf8 => {
+                let string = jni_static_call!(env,
+                    comet_exec.get_string(self.exec_context_id, self.id) -> StringWrapper
+                )?;
+
+                let string = env.get_string(*string.get()).unwrap().into();
+                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(string))))
+            }
+            DataType::Binary => {
+                let bytes = jni_static_call!(env,
+                    comet_exec.get_binary(self.exec_context_id, self.id) -> BinaryWrapper
+                )?;
+
+                let slice = env.convert_byte_array((*bytes.get()).into_inner()).unwrap();
+
+                Ok(ColumnarValue::Scalar(ScalarValue::Binary(Some(slice))))
+            }
+            _ => internal_err!("Unsupported scalar subquery data type {:?}", self.data_type),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
+        Ok(self)
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.hash(&mut s)
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/sum_decimal.rs b/core/src/execution/datafusion/expressions/sum_decimal.rs
new file mode 100644
index 000000000..d7dc79e6f
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/sum_decimal.rs
@@ -0,0 +1,455 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{array::BooleanBufferBuilder, buffer::NullBuffer};
+use arrow_array::{
+    cast::AsArray, types::Decimal128Type, Array, ArrayRef, BooleanArray, Decimal128Array,
+};
+use arrow_data::decimal::validate_decimal_precision;
+use arrow_schema::{DataType, Field};
+use datafusion::logical_expr::Accumulator;
+use datafusion_common::{Result as DFResult, ScalarValue};
+use datafusion_physical_expr::{
+    aggregate::utils::down_cast_any_ref, AggregateExpr, EmitTo, GroupsAccumulator, PhysicalExpr,
+};
+use std::{any::Any, ops::BitAnd, sync::Arc};
+
+use crate::unlikely;
+
+#[derive(Debug)]
+pub struct SumDecimal {
+    name: String,
+    expr: Arc<dyn PhysicalExpr>,
+
+    /// The data type of the SUM result
+    result_type: DataType,
+
+    /// Decimal precision and scale
+    precision: u8,
+    scale: i8,
+
+    /// Whether the result is nullable
+    nullable: bool,
+}
+
+impl SumDecimal {
+    pub fn new(name: impl Into<String>, expr: Arc<dyn PhysicalExpr>, data_type: DataType) -> Self {
+        // The `data_type` is the SUM result type passed from Spark side
+        let (precision, scale) = match data_type {
+            DataType::Decimal128(p, s) => (p, s),
+            _ => unreachable!(),
+        };
+        Self {
+            name: name.into(),
+            expr,
+            result_type: data_type,
+            precision,
+            scale,
+            nullable: true,
+        }
+    }
+}
+
+impl AggregateExpr for SumDecimal {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn field(&self) -> DFResult<Field> {
+        Ok(Field::new(
+            &self.name,
+            self.result_type.clone(),
+            self.nullable,
+        ))
+    }
+
+    fn create_accumulator(&self) -> DFResult<Box<dyn Accumulator>> {
+        Ok(Box::new(SumDecimalAccumulator::new(
+            self.precision,
+            self.scale,
+        )))
+    }
+
+    fn state_fields(&self) -> DFResult<Vec<Field>> {
+        let fields = vec![
+            Field::new(&self.name, self.result_type.clone(), self.nullable),
+            Field::new("is_empty", DataType::Boolean, false),
+        ];
+        Ok(fields)
+    }
+
+    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.expr.clone()]
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> DFResult<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(SumDecimalGroupsAccumulator::new(
+            self.result_type.clone(),
+            self.precision,
+            self.scale,
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for SumDecimal {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.name == x.name
+                    && self.precision == x.precision
+                    && self.scale == x.scale
+                    && self.nullable == x.nullable
+                    && self.result_type == x.result_type
+                    && self.expr.eq(&x.expr)
+            })
+            .unwrap_or(false)
+    }
+}
+
+#[derive(Debug)]
+struct SumDecimalAccumulator {
+    sum: i128,
+    is_empty: bool,
+    is_not_null: bool,
+
+    precision: u8,
+    scale: i8,
+}
+
+impl SumDecimalAccumulator {
+    fn new(precision: u8, scale: i8) -> Self {
+        Self {
+            sum: 0,
+            is_empty: true,
+            is_not_null: true,
+            precision,
+            scale,
+        }
+    }
+
+    fn update_single(&mut self, values: &Decimal128Array, idx: usize) {
+        let v = unsafe { values.value_unchecked(idx) };
+        let (new_sum, is_overflow) = self.sum.overflowing_add(v);
+
+        if is_overflow || validate_decimal_precision(new_sum, self.precision).is_err() {
+            // Overflow: set buffer accumulator to null
+            self.is_not_null = false;
+            return;
+        }
+
+        self.sum = new_sum;
+        self.is_not_null = true;
+    }
+}
+
+impl Accumulator for SumDecimalAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> DFResult<()> {
+        assert_eq!(
+            values.len(),
+            1,
+            "Expect only one element in 'values' but found {}",
+            values.len()
+        );
+
+        if !self.is_empty && !self.is_not_null {
+            // This means there's a overflow in decimal, so we will just skip the rest
+            // of the computation
+            return Ok(());
+        }
+
+        let values = &values[0];
+        let data = values.as_primitive::<Decimal128Type>();
+
+        self.is_empty = self.is_empty && values.len() == values.null_count();
+
+        if values.null_count() == 0 {
+            for i in 0..data.len() {
+                self.update_single(data, i);
+            }
+        } else {
+            for i in 0..data.len() {
+                if data.is_null(i) {
+                    continue;
+                }
+                self.update_single(data, i);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&self) -> DFResult<ScalarValue> {
+        // For each group:
+        //   1. if `is_empty` is true, it means either there is no value or all values for the group
+        //      are null, in this case we'll return null
+        //   2. if `is_empty` is false, but `null_state` is true, it means there's an overflow. In
+        //      non-ANSI mode Spark returns null.
+        if self.is_empty || !self.is_not_null {
+            ScalarValue::new_primitive::<Decimal128Type>(
+                None,
+                &DataType::Decimal128(self.precision, self.scale),
+            )
+        } else {
+            ScalarValue::try_new_decimal128(self.sum, self.precision, self.scale)
+        }
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self)
+    }
+
+    fn state(&self) -> DFResult<Vec<ScalarValue>> {
+        let sum = if self.is_not_null {
+            ScalarValue::try_new_decimal128(self.sum, self.precision, self.scale)?
+        } else {
+            ScalarValue::new_primitive::<Decimal128Type>(
+                None,
+                &DataType::Decimal128(self.precision, self.scale),
+            )?
+        };
+        Ok(vec![sum, ScalarValue::from(self.is_empty)])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> DFResult<()> {
+        assert_eq!(
+            states.len(),
+            2,
+            "Expect two element in 'states' but found {}",
+            states.len()
+        );
+        assert_eq!(states[0].len(), 1);
+        assert_eq!(states[1].len(), 1);
+
+        let that_sum = states[0].as_primitive::<Decimal128Type>();
+        let that_is_empty = states[1].as_any().downcast_ref::<BooleanArray>().unwrap();
+
+        let this_overflow = !self.is_empty && !self.is_not_null;
+        let that_overflow = !that_is_empty.value(0) && that_sum.is_null(0);
+
+        self.is_not_null = !this_overflow && !that_overflow;
+        self.is_empty = self.is_empty && that_is_empty.value(0);
+
+        if self.is_not_null {
+            self.sum += that_sum.value(0);
+        }
+
+        Ok(())
+    }
+}
+
+struct SumDecimalGroupsAccumulator {
+    // Whether aggregate buffer for a particular group is null. True indicates it is not null.
+    is_not_null: BooleanBufferBuilder,
+    is_empty: BooleanBufferBuilder,
+    sum: Vec<i128>,
+    result_type: DataType,
+    precision: u8,
+    scale: i8,
+}
+
+impl SumDecimalGroupsAccumulator {
+    fn new(result_type: DataType, precision: u8, scale: i8) -> Self {
+        Self {
+            is_not_null: BooleanBufferBuilder::new(0),
+            is_empty: BooleanBufferBuilder::new(0),
+            sum: Vec::new(),
+            result_type,
+            precision,
+            scale,
+        }
+    }
+
+    fn is_overflow(&self, index: usize) -> bool {
+        !self.is_empty.get_bit(index) && !self.is_not_null.get_bit(index)
+    }
+
+    fn update_single(&mut self, group_index: usize, value: i128) {
+        if unlikely(self.is_overflow(group_index)) {
+            // This means there's a overflow in decimal, so we will just skip the rest
+            // of the computation
+            return;
+        }
+
+        self.is_empty.set_bit(group_index, false);
+        let (new_sum, is_overflow) = self.sum[group_index].overflowing_add(value);
+
+        if is_overflow || validate_decimal_precision(new_sum, self.precision).is_err() {
+            // Overflow: set buffer accumulator to null
+            self.is_not_null.set_bit(group_index, false);
+            return;
+        }
+
+        self.sum[group_index] = new_sum;
+        self.is_not_null.set_bit(group_index, true)
+    }
+}
+
+fn ensure_bit_capacity(builder: &mut BooleanBufferBuilder, capacity: usize) {
+    if builder.len() < capacity {
+        let additional = capacity - builder.len();
+        builder.append_n(additional, true);
+    }
+}
+
+impl GroupsAccumulator for SumDecimalGroupsAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> DFResult<()> {
+        assert!(opt_filter.is_none(), "opt_filter is not supported yet");
+        assert_eq!(values.len(), 1);
+        let values = values[0].as_primitive::<Decimal128Type>();
+        let data = values.values();
+
+        // Update size for the accumulate states
+        self.sum.resize(total_num_groups, 0);
+        ensure_bit_capacity(&mut self.is_empty, total_num_groups);
+        ensure_bit_capacity(&mut self.is_not_null, total_num_groups);
+
+        let iter = group_indices.iter().zip(data.iter());
+        if values.null_count() == 0 {
+            for (&group_index, &value) in iter {
+                self.update_single(group_index, value);
+            }
+        } else {
+            for (idx, (&group_index, &value)) in iter.enumerate() {
+                if values.is_null(idx) {
+                    continue;
+                }
+                self.update_single(group_index, value);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> DFResult<ArrayRef> {
+        // TODO: we do not support group-by ordering yet, but should fix here once it is supported
+        assert!(
+            matches!(emit_to, EmitTo::All),
+            "EmitTo::First is not supported"
+        );
+        // For each group:
+        //   1. if `is_empty` is true, it means either there is no value or all values for the group
+        //      are null, in this case we'll return null
+        //   2. if `is_empty` is false, but `null_state` is true, it means there's an overflow. In
+        //      non-ANSI mode Spark returns null.
+        let nulls = self.is_not_null.finish();
+        let is_empty = self.is_empty.finish();
+        let x = (!&is_empty).bitand(&nulls);
+
+        let result = emit_to.take_needed(&mut self.sum);
+        let result = Decimal128Array::new(result.into(), Some(NullBuffer::new(x)))
+            .with_data_type(self.result_type.clone());
+
+        Ok(Arc::new(result))
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> DFResult<Vec<ArrayRef>> {
+        // TODO: we do not support group-by ordering yet, but should fix here once it is supported
+        assert!(
+            matches!(emit_to, EmitTo::All),
+            "EmitTo::First is not supported"
+        );
+        let nulls = self.is_not_null.finish();
+        let nulls = Some(NullBuffer::new(nulls));
+
+        let sum = emit_to.take_needed(&mut self.sum);
+        let sum = Decimal128Array::new(sum.into(), nulls.clone())
+            .with_data_type(self.result_type.clone());
+
+        let is_empty = self.is_empty.finish();
+        let is_empty = BooleanArray::new(is_empty, None);
+
+        Ok(vec![
+            Arc::new(sum) as ArrayRef,
+            Arc::new(is_empty) as ArrayRef,
+        ])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> DFResult<()> {
+        assert_eq!(
+            values.len(),
+            2,
+            "Expected two arrays: 'sum' and 'is_empty', but found {}",
+            values.len()
+        );
+        assert!(opt_filter.is_none(), "opt_filter is not supported yet");
+
+        // Make sure we have enough capacity for the additional groups
+        self.sum.resize(total_num_groups, 0);
+        ensure_bit_capacity(&mut self.is_empty, total_num_groups);
+        ensure_bit_capacity(&mut self.is_not_null, total_num_groups);
+
+        let that_sum = &values[0];
+        let that_sum = that_sum.as_primitive::<Decimal128Type>();
+        let that_is_empty = &values[1];
+        let that_is_empty = that_is_empty
+            .as_any()
+            .downcast_ref::<BooleanArray>()
+            .unwrap();
+
+        group_indices
+            .iter()
+            .enumerate()
+            .for_each(|(idx, &group_index)| unsafe {
+                let this_overflow = self.is_overflow(group_index);
+                let that_is_empty = that_is_empty.value_unchecked(idx);
+                let that_overflow = !that_is_empty && that_sum.is_null(idx);
+                let is_overflow = this_overflow || that_overflow;
+
+                // This part follows the logic in Spark:
+                //   `org.apache.spark.sql.catalyst.expressions.aggregate.Sum`
+                self.is_not_null.set_bit(group_index, !is_overflow);
+                self.is_empty.set_bit(
+                    group_index,
+                    self.is_empty.get_bit(group_index) && that_is_empty,
+                );
+                if !is_overflow {
+                    // .. otherwise, the sum value for this particular index must not be null,
+                    // and thus we merge both values and update this sum.
+                    self.sum[group_index] += that_sum.value_unchecked(idx);
+                }
+            });
+
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.sum.capacity() * std::mem::size_of::<i128>()
+            + self.is_empty.capacity() / 8
+            + self.is_not_null.capacity() / 8
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/temporal.rs b/core/src/execution/datafusion/expressions/temporal.rs
new file mode 100644
index 000000000..3654a4ed9
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/temporal.rs
@@ -0,0 +1,518 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    fmt::{Debug, Display, Formatter},
+    hash::{Hash, Hasher},
+    sync::Arc,
+};
+
+use arrow::{
+    compute::{hour_dyn, minute_dyn, second_dyn},
+    record_batch::RecordBatch,
+};
+use arrow_schema::{DataType, Schema, TimeUnit::Microsecond};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion_common::{DataFusionError, ScalarValue::Utf8};
+use datafusion_physical_expr::PhysicalExpr;
+
+use crate::execution::{
+    datafusion::expressions::utils::{array_with_timezone, down_cast_any_ref},
+    kernels::temporal::{date_trunc_dyn, timestamp_trunc_dyn},
+};
+
+#[derive(Debug, Hash)]
+pub struct HourExec {
+    /// An array with DataType::Timestamp(TimeUnit::Microsecond, None)
+    child: Arc<dyn PhysicalExpr>,
+    timezone: String,
+}
+
+impl HourExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>, timezone: String) -> Self {
+        HourExec { child, timezone }
+    }
+}
+
+impl Display for HourExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Hour [timezone:{}, child: {}]",
+            self.timezone, self.child
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for HourExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.timezone.eq(&x.timezone))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for HourExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        match self.child.data_type(input_schema).unwrap() {
+            DataType::Dictionary(key_type, _) => {
+                Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32)))
+            }
+            _ => Ok(DataType::Int32),
+        }
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let array = array_with_timezone(
+                    array,
+                    self.timezone.clone(),
+                    Some(&DataType::Timestamp(
+                        Microsecond,
+                        Some(self.timezone.clone().into()),
+                    )),
+                );
+                let result = hour_dyn(&array)?;
+
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Hour(scalar) should be fold in Spark JVM side.".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+        Ok(Arc::new(HourExec::new(
+            children[0].clone(),
+            self.timezone.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.timezone.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+#[derive(Debug, Hash)]
+pub struct MinuteExec {
+    /// An array with DataType::Timestamp(TimeUnit::Microsecond, None)
+    child: Arc<dyn PhysicalExpr>,
+    timezone: String,
+}
+
+impl MinuteExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>, timezone: String) -> Self {
+        MinuteExec { child, timezone }
+    }
+}
+
+impl Display for MinuteExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Minute [timezone:{}, child: {}]",
+            self.timezone, self.child
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for MinuteExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.timezone.eq(&x.timezone))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for MinuteExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        match self.child.data_type(input_schema).unwrap() {
+            DataType::Dictionary(key_type, _) => {
+                Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32)))
+            }
+            _ => Ok(DataType::Int32),
+        }
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let array = array_with_timezone(
+                    array,
+                    self.timezone.clone(),
+                    Some(&DataType::Timestamp(
+                        Microsecond,
+                        Some(self.timezone.clone().into()),
+                    )),
+                );
+                let result = minute_dyn(&array)?;
+
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Minute(scalar) should be fold in Spark JVM side.".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+        Ok(Arc::new(MinuteExec::new(
+            children[0].clone(),
+            self.timezone.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.timezone.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+#[derive(Debug, Hash)]
+pub struct SecondExec {
+    /// An array with DataType::Timestamp(TimeUnit::Microsecond, None)
+    child: Arc<dyn PhysicalExpr>,
+    timezone: String,
+}
+
+impl SecondExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>, timezone: String) -> Self {
+        SecondExec { child, timezone }
+    }
+}
+
+impl Display for SecondExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Second (timezone:{}, child: {}]",
+            self.timezone, self.child
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for SecondExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.timezone.eq(&x.timezone))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for SecondExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        match self.child.data_type(input_schema).unwrap() {
+            DataType::Dictionary(key_type, _) => {
+                Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32)))
+            }
+            _ => Ok(DataType::Int32),
+        }
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let arg = self.child.evaluate(batch)?;
+        match arg {
+            ColumnarValue::Array(array) => {
+                let array = array_with_timezone(
+                    array,
+                    self.timezone.clone(),
+                    Some(&DataType::Timestamp(
+                        Microsecond,
+                        Some(self.timezone.clone().into()),
+                    )),
+                );
+                let result = second_dyn(&array)?;
+
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Second(scalar) should be fold in Spark JVM side.".to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+        Ok(Arc::new(SecondExec::new(
+            children[0].clone(),
+            self.timezone.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.timezone.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+#[derive(Debug, Hash)]
+pub struct DateTruncExec {
+    /// An array with DataType::Date32
+    child: Arc<dyn PhysicalExpr>,
+    /// Scalar UTF8 string matching the valid values in Spark SQL: https://spark.apache.org/docs/latest/api/sql/index.html#trunc
+    format: Arc<dyn PhysicalExpr>,
+}
+
+impl DateTruncExec {
+    pub fn new(child: Arc<dyn PhysicalExpr>, format: Arc<dyn PhysicalExpr>) -> Self {
+        DateTruncExec { child, format }
+    }
+}
+
+impl Display for DateTruncExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "DateTrunc [child:{}, format: {}]",
+            self.child, self.format
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for DateTruncExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.child.eq(&x.child) && self.format.eq(&x.format))
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for DateTruncExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        self.child.data_type(input_schema)
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let date = self.child.evaluate(batch)?;
+        let format = self.format.evaluate(batch)?;
+        match (date, format) {
+            (ColumnarValue::Array(date), ColumnarValue::Scalar(Utf8(Some(format)))) => {
+                let result = date_trunc_dyn(&date, format)?;
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Invalid input to function DateTrunc. Expected (PrimitiveArray<Date32>, Scalar)"
+                    .to_string(),
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+        Ok(Arc::new(DateTruncExec::new(
+            children[0].clone(),
+            self.format.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.format.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
+
+#[derive(Debug, Hash)]
+pub struct TimestampTruncExec {
+    /// An array with DataType::Timestamp(TimeUnit::Microsecond, None)
+    child: Arc<dyn PhysicalExpr>,
+    /// Scalar UTF8 string matching the valid values in Spark SQL: https://spark.apache.org/docs/latest/api/sql/index.html#date_trunc
+    format: Arc<dyn PhysicalExpr>,
+    /// String containing a timezone name. The name must be found in the standard timezone
+    /// database (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). The string is
+    /// later parsed into a chrono::TimeZone.
+    /// Timestamp arrays in this implementation are kept in arrays of UTC timestamps (in micros)
+    /// along with a single value for the associated TimeZone. The timezone offset is applied
+    /// just before any operations on the timestamp
+    timezone: String,
+}
+
+impl TimestampTruncExec {
+    pub fn new(
+        child: Arc<dyn PhysicalExpr>,
+        format: Arc<dyn PhysicalExpr>,
+        timezone: String,
+    ) -> Self {
+        TimestampTruncExec {
+            child,
+            format,
+            timezone,
+        }
+    }
+}
+
+impl Display for TimestampTruncExec {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "TimestampTrunc [child:{}, format:{}, timezone: {}]",
+            self.child, self.format, self.timezone
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for TimestampTruncExec {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.child.eq(&x.child)
+                    && self.format.eq(&x.format)
+                    && self.timezone.eq(&x.timezone)
+            })
+            .unwrap_or(false)
+    }
+}
+
+impl PhysicalExpr for TimestampTruncExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result<DataType> {
+        match self.child.data_type(input_schema)? {
+            DataType::Dictionary(key_type, _) => Ok(DataType::Dictionary(
+                key_type,
+                Box::new(DataType::Timestamp(Microsecond, None)),
+            )),
+            _ => Ok(DataType::Timestamp(Microsecond, None)),
+        }
+    }
+
+    fn nullable(&self, _: &Schema) -> datafusion_common::Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result<ColumnarValue> {
+        let timestamp = self.child.evaluate(batch)?;
+        let format = self.format.evaluate(batch)?;
+        let tz = self.timezone.clone();
+        match (timestamp, format) {
+            (ColumnarValue::Array(ts), ColumnarValue::Scalar(Utf8(Some(format)))) => {
+                let ts = array_with_timezone(
+                    ts,
+                    tz.clone(),
+                    Some(&DataType::Timestamp(Microsecond, Some(tz.into()))),
+                );
+                let result = timestamp_trunc_dyn(&ts, format)?;
+                Ok(ColumnarValue::Array(result))
+            }
+            _ => Err(DataFusionError::Execution(
+                "Invalid input to function TimestampTrunc. ".to_owned()
+                    + "Expected (PrimitiveArray<TimestampMicrosecondType>, Scalar, String)",
+            )),
+        }
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
+        Ok(Arc::new(TimestampTruncExec::new(
+            children[0].clone(),
+            self.format.clone(),
+            self.timezone.clone(),
+        )))
+    }
+
+    fn dyn_hash(&self, state: &mut dyn Hasher) {
+        let mut s = state;
+        self.child.hash(&mut s);
+        self.format.hash(&mut s);
+        self.timezone.hash(&mut s);
+        self.hash(&mut s);
+    }
+}
diff --git a/core/src/execution/datafusion/expressions/utils.rs b/core/src/execution/datafusion/expressions/utils.rs
new file mode 100644
index 000000000..ec0cf2207
--- /dev/null
+++ b/core/src/execution/datafusion/expressions/utils.rs
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::execution::timezone::Tz;
+use arrow::{
+    array::{
+        as_dictionary_array, as_primitive_array, Array, ArrayRef, GenericStringArray,
+        PrimitiveArray,
+    },
+    compute::unary,
+    datatypes::{Int32Type, Int64Type, TimestampMicrosecondType},
+    error::ArrowError,
+    temporal_conversions::as_datetime,
+};
+use arrow_array::{cast::AsArray, types::ArrowPrimitiveType};
+use arrow_schema::DataType;
+use chrono::{DateTime, Offset, TimeZone};
+use datafusion_common::cast::as_generic_string_array;
+use datafusion_physical_expr::PhysicalExpr;
+use std::{any::Any, sync::Arc};
+
+/// An utility function from DataFusion. It is not exposed by DataFusion.
+pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
+    if any.is::<Arc<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Arc<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else if any.is::<Box<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Box<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else {
+        any
+    }
+}
+
+/// Preprocesses input arrays to add timezone information from Spark to Arrow array datatype or
+/// to apply timezone offset.
+//
+//  We consider the following cases:
+//
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Conversion            | Input array  | Timezone          | Output array                     |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Timestamp ->          | Array in UTC | Timezone of input | A timestamp with the timezone    |
+//  |  Utf8 or Date32       |              |                   | offset applied and timezone      |
+//  |                       |              |                   | removed                          |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Timestamp ->          | Array in UTC | Timezone of input | Same as input array              |
+//  |  Timestamp  w/Timezone|              |                   |                                  |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Timestamp_ntz ->      | Array in     | Timezone of input | Same as input array              |
+//  |   Utf8 or Date32      | timezone     |                   |                                  |
+//  |                       | session local|                   |                                  |
+//  |                       | timezone     |                   |                                  |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Timestamp_ntz ->      | Array in     | Timezone of input |  Array in UTC and timezone       |
+//  |  Timestamp w/Timezone | session local|                   |  specified in input              |
+//  |                       | timezone     |                   |                                  |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//  | Timestamp(_ntz) ->    |                                                                     |
+//  |        Any other type |              Not Supported                                          |
+//  | --------------------- | ------------ | ----------------- | -------------------------------- |
+//
+pub fn array_with_timezone(
+    array: ArrayRef,
+    timezone: String,
+    to_type: Option<&DataType>,
+) -> ArrayRef {
+    match array.data_type() {
+        DataType::Timestamp(_, None) => {
+            assert!(!timezone.is_empty());
+            match to_type {
+                Some(DataType::Utf8) | Some(DataType::Date32) => array,
+                Some(DataType::Timestamp(_, Some(_))) => {
+                    timestamp_ntz_to_timestamp(array, timezone.as_str(), Some(timezone.as_str()))
+                }
+                _ => {
+                    // Not supported
+                    panic!(
+                        "Cannot convert from {:?} to {:?}",
+                        array.data_type(),
+                        to_type.unwrap()
+                    )
+                }
+            }
+        }
+        DataType::Timestamp(_, Some(_)) => {
+            assert!(!timezone.is_empty());
+            let array = as_primitive_array::<TimestampMicrosecondType>(&array);
+            let array_with_timezone = array.clone().with_timezone(timezone.clone());
+            let array = Arc::new(array_with_timezone) as ArrayRef;
+            match to_type {
+                Some(DataType::Utf8) | Some(DataType::Date32) => {
+                    pre_timestamp_cast(array, timezone)
+                }
+                _ => array,
+            }
+        }
+        DataType::Dictionary(_, value_type)
+            if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) =>
+        {
+            let dict = as_dictionary_array::<Int32Type>(&array);
+            let array = as_primitive_array::<TimestampMicrosecondType>(dict.values());
+            let array_with_timezone =
+                array_with_timezone(Arc::new(array.clone()) as ArrayRef, timezone, to_type);
+            let dict = dict.with_values(array_with_timezone);
+            Arc::new(dict) as ArrayRef
+        }
+        _ => array,
+    }
+}
+
+/// Takes in a Timestamp(Microsecond, None) array and a timezone id, and returns
+/// a Timestamp(Microsecond, Some<_>) array.
+/// The understanding is that the input array has time in the timezone specified in the second
+/// argument.
+/// Parameters:
+///     array - input array of timestamp without timezone
+///     tz - timezone of the values in the input array
+///     to_timezone - timezone to change the input values to
+fn timestamp_ntz_to_timestamp(array: ArrayRef, tz: &str, to_timezone: Option<&str>) -> ArrayRef {
+    assert!(!tz.is_empty());
+    match array.data_type() {
+        DataType::Timestamp(_, None) => {
+            let array = as_primitive_array::<TimestampMicrosecondType>(&array);
+            let tz: Tz = tz.parse().unwrap();
+            let values = array.iter().map(|v| {
+                v.map(|value| {
+                    let local_datetime = as_datetime::<TimestampMicrosecondType>(value).unwrap();
+                    let datetime: DateTime<Tz> = tz.from_local_datetime(&local_datetime).unwrap();
+                    datetime.timestamp_micros()
+                })
+            });
+            let mut array: PrimitiveArray<TimestampMicrosecondType> =
+                unsafe { PrimitiveArray::from_trusted_len_iter(values) };
+            array = if let Some(to_tz) = to_timezone {
+                array.with_timezone(to_tz)
+            } else {
+                array
+            };
+            Arc::new(array) as ArrayRef
+        }
+        _ => array,
+    }
+}
+
+const MICROS_PER_SECOND: i64 = 1000000;
+
+/// This takes for special pre-casting cases of Spark. E.g., Timestamp to String.
+fn pre_timestamp_cast(array: ArrayRef, timezone: String) -> ArrayRef {
+    assert!(!timezone.is_empty());
+    match array.data_type() {
+        DataType::Timestamp(_, _) => {
+            // Spark doesn't output timezone while casting timestamp to string, but arrow's cast
+            // kernel does if timezone exists. So we need to apply offset of timezone to array
+            // timestamp value and remove timezone from array datatype.
+            let array = as_primitive_array::<TimestampMicrosecondType>(&array);
+
+            let tz: Tz = timezone.parse().unwrap();
+            let values = array.iter().map(|v| {
+                v.map(|value| {
+                    let datetime = as_datetime::<TimestampMicrosecondType>(value).unwrap();
+                    let offset = tz.offset_from_utc_datetime(&datetime).fix();
+                    let datetime = datetime + offset;
+                    datetime.timestamp_micros()
+                })
+            });
+
+            let array: PrimitiveArray<TimestampMicrosecondType> =
+                unsafe { PrimitiveArray::from_trusted_len_iter(values) };
+            Arc::new(array) as ArrayRef
+        }
+        _ => array,
+    }
+}
+
+/// This takes for special casting cases of Spark. E.g., Timestamp to Long.
+/// This function runs as a post process of the DataFusion cast(). By the time it arrives here,
+/// Dictionary arrays are already unpacked by the DataFusion cast() since Spark cannot specify
+/// Dictionary as to_type. The from_type is taken before the DataFusion cast() runs in
+/// expressions/cast.rs, so it can be still Dictionary.
+pub(crate) fn spark_cast(array: ArrayRef, from_type: &DataType, to_type: &DataType) -> ArrayRef {
+    match (from_type, to_type) {
+        (DataType::Timestamp(_, _), DataType::Int64) => {
+            // See Spark's `Cast` expression
+            unary_dyn::<_, Int64Type>(&array, |v| v.div_floor(MICROS_PER_SECOND)).unwrap()
+        }
+        (DataType::Dictionary(_, value_type), DataType::Int64)
+            if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) =>
+        {
+            // See Spark's `Cast` expression
+            unary_dyn::<_, Int64Type>(&array, |v| v.div_floor(MICROS_PER_SECOND)).unwrap()
+        }
+        (DataType::Timestamp(_, _), DataType::Utf8) => remove_trailing_zeroes(array),
+        (DataType::Dictionary(_, value_type), DataType::Utf8)
+            if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) =>
+        {
+            remove_trailing_zeroes(array)
+        }
+        _ => array,
+    }
+}
+
+/// A fork & modified version of Arrow's `unary_dyn` which is being deprecated
+fn unary_dyn<F, T>(array: &ArrayRef, op: F) -> Result<ArrayRef, ArrowError>
+where
+    T: ArrowPrimitiveType,
+    F: Fn(T::Native) -> T::Native,
+{
+    if let Some(d) = array.as_any_dictionary_opt() {
+        let new_values = unary_dyn::<F, T>(d.values(), op)?;
+        return Ok(Arc::new(d.with_values(Arc::new(new_values))));
+    }
+
+    match array.as_primitive_opt::<T>() {
+        Some(a) if PrimitiveArray::<T>::is_compatible(a.data_type()) => {
+            Ok(Arc::new(unary::<T, F, T>(
+                array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap(),
+                op,
+            )))
+        }
+        _ => Err(ArrowError::NotYetImplemented(format!(
+            "Cannot perform unary operation of type {} on array of type {}",
+            T::DATA_TYPE,
+            array.data_type()
+        ))),
+    }
+}
+
+/// Remove any trailing zeroes in the string if they occur after in the fractional seconds,
+/// to match Spark behavior
+/// example:
+/// "1970-01-01 05:29:59.900" => "1970-01-01 05:29:59.9"
+/// "1970-01-01 05:29:59.990" => "1970-01-01 05:29:59.99"
+/// "1970-01-01 05:29:59.999" => "1970-01-01 05:29:59.999"
+/// "1970-01-01 05:30:00"     => "1970-01-01 05:30:00"
+/// "1970-01-01 05:30:00.001" => "1970-01-01 05:30:00.001"
+fn remove_trailing_zeroes(array: ArrayRef) -> ArrayRef {
+    let string_array = as_generic_string_array::<i32>(&array).unwrap();
+    let result = string_array
+        .iter()
+        .map(|s| s.map(trim_end))
+        .collect::<GenericStringArray<i32>>();
+    Arc::new(result) as ArrayRef
+}
+
+fn trim_end(s: &str) -> &str {
+    if s.rfind('.').is_some() {
+        s.trim_end_matches('0')
+    } else {
+        s
+    }
+}
diff --git a/core/src/execution/datafusion/mod.rs b/core/src/execution/datafusion/mod.rs
new file mode 100644
index 000000000..b74ee2638
--- /dev/null
+++ b/core/src/execution/datafusion/mod.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Native execution through DataFusion
+
+mod expressions;
+mod operators;
+pub mod planner;
+mod spark_hash;
diff --git a/core/src/execution/datafusion/operators/expand.rs b/core/src/execution/datafusion/operators/expand.rs
new file mode 100644
index 000000000..e3f681b77
--- /dev/null
+++ b/core/src/execution/datafusion/operators/expand.rs
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::RecordBatch;
+use arrow_schema::SchemaRef;
+use datafusion::{
+    execution::TaskContext,
+    physical_plan::{
+        DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
+        SendableRecordBatchStream,
+    },
+};
+use datafusion_common::DataFusionError;
+use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+use futures::{Stream, StreamExt};
+use std::{
+    any::Any,
+    pin::Pin,
+    sync::Arc,
+    task::{Context, Poll},
+};
+
+/// A Comet native operator that expands a single row into multiple rows. This behaves as same as
+/// Spark Expand operator.
+#[derive(Debug)]
+pub struct CometExpandExec {
+    projections: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+    child: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+}
+
+impl CometExpandExec {
+    /// Create a new ExpandExec
+    pub fn new(
+        projections: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+        child: Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+    ) -> Self {
+        Self {
+            projections,
+            child,
+            schema,
+        }
+    }
+}
+
+impl DisplayAs for CometExpandExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "CometExpandExec")?;
+                write!(f, "Projections: [")?;
+                for projection in &self.projections {
+                    write!(f, "[")?;
+                    for expr in projection {
+                        write!(f, "{}, ", expr)?;
+                    }
+                    write!(f, "], ")?;
+                }
+                write!(f, "]")?;
+
+                Ok(())
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for CometExpandExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn output_partitioning(&self) -> Partitioning {
+        Partitioning::UnknownPartitioning(1)
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        vec![self.child.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        let new_expand = CometExpandExec::new(
+            self.projections.clone(),
+            children[0].clone(),
+            self.schema.clone(),
+        );
+        Ok(Arc::new(new_expand))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> datafusion_common::Result<SendableRecordBatchStream> {
+        let child_stream = self.child.execute(partition, context)?;
+        let expand_stream =
+            ExpandStream::new(self.projections.clone(), child_stream, self.schema.clone());
+        Ok(Box::pin(expand_stream))
+    }
+}
+
+pub struct ExpandStream {
+    projections: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+    child_stream: SendableRecordBatchStream,
+    schema: SchemaRef,
+    current_index: i32,
+    max_index: i32,
+    current_batch: Option<RecordBatch>,
+}
+
+impl ExpandStream {
+    /// Create a new ExpandStream
+    pub fn new(
+        projections: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+        child_stream: SendableRecordBatchStream,
+        schema: SchemaRef,
+    ) -> Self {
+        let max_index = projections.len() as i32;
+        Self {
+            projections,
+            child_stream,
+            schema,
+            current_index: -1,
+            max_index,
+            current_batch: None,
+        }
+    }
+
+    fn expand(
+        &self,
+        batch: &RecordBatch,
+        projection: &[Arc<dyn PhysicalExpr>],
+    ) -> Result<RecordBatch, DataFusionError> {
+        let mut columns = vec![];
+
+        projection.iter().try_for_each(|expr| {
+            let column = expr.evaluate(batch)?;
+            columns.push(column.into_array(batch.num_rows())?);
+
+            Ok::<(), DataFusionError>(())
+        })?;
+
+        RecordBatch::try_new(self.schema.clone(), columns).map_err(|e| e.into())
+    }
+}
+
+impl Stream for ExpandStream {
+    type Item = datafusion_common::Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        if self.current_index == -1 {
+            let next = self.child_stream.poll_next_unpin(cx);
+            match next {
+                Poll::Ready(Some(Ok(batch))) => {
+                    self.current_batch = Some(batch);
+                    self.current_index = 0;
+                }
+                other => return other,
+            }
+        }
+        assert!(self.current_batch.is_some());
+
+        let projection = &self.projections[self.current_index as usize];
+        let batch = self.expand(self.current_batch.as_ref().unwrap(), projection);
+
+        self.current_index += 1;
+
+        if self.current_index == self.max_index {
+            self.current_index = -1;
+            self.current_batch = None;
+        }
+        Poll::Ready(Some(batch))
+    }
+}
+
+impl RecordBatchStream for ExpandStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/core/src/execution/datafusion/operators/mod.rs b/core/src/execution/datafusion/operators/mod.rs
new file mode 100644
index 000000000..3d28a266a
--- /dev/null
+++ b/core/src/execution/datafusion/operators/mod.rs
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod expand;
diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs
new file mode 100644
index 000000000..1a6aa5ac3
--- /dev/null
+++ b/core/src/execution/datafusion/planner.rs
@@ -0,0 +1,1192 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Converts Spark physical plan to DataFusion physical plan
+
+use std::{str::FromStr, sync::Arc};
+
+use arrow_schema::{DataType, Field, Schema, TimeUnit};
+use datafusion::{
+    arrow::{compute::SortOptions, datatypes::SchemaRef},
+    common::DataFusionError,
+    logical_expr::{BuiltinScalarFunction, Operator as DataFusionOperator},
+    physical_expr::{
+        expressions::{BinaryExpr, Column, IsNotNullExpr, Literal as DataFusionLiteral},
+        functions::create_physical_expr,
+        PhysicalExpr, PhysicalSortExpr,
+    },
+    physical_plan::{
+        aggregates::{AggregateMode as DFAggregateMode, PhysicalGroupBy},
+        filter::FilterExec,
+        limit::LocalLimitExec,
+        projection::ProjectionExec,
+        sorts::sort::SortExec,
+        ExecutionPlan, Partitioning,
+    },
+};
+use datafusion_common::ScalarValue;
+use datafusion_physical_expr::{
+    execution_props::ExecutionProps,
+    expressions::{
+        CaseExpr, CastExpr, Count, InListExpr, IsNullExpr, Max, Min, NegativeExpr, NotExpr, Sum,
+    },
+    AggregateExpr, ScalarFunctionExpr,
+};
+use itertools::Itertools;
+use num::{BigInt, ToPrimitive};
+
+use crate::{
+    errors::ExpressionError,
+    execution::{
+        datafusion::{
+            expressions::{
+                avg::Avg,
+                avg_decimal::AvgDecimal,
+                bitwise_not::BitwiseNotExpr,
+                cast::Cast,
+                checkoverflow::CheckOverflow,
+                if_expr::IfExpr,
+                scalar_funcs::create_comet_physical_fun,
+                strings::{Contains, EndsWith, Like, StartsWith, StringSpaceExec, SubstringExec},
+                subquery::Subquery,
+                sum_decimal::SumDecimal,
+                temporal::{DateTruncExec, HourExec, MinuteExec, SecondExec, TimestampTruncExec},
+                NormalizeNaNAndZero,
+            },
+            operators::expand::CometExpandExec,
+        },
+        operators::{CopyExec, ExecutionError, InputBatch, ScanExec},
+        serde::to_arrow_datatype,
+        spark_expression,
+        spark_expression::{
+            agg_expr::ExprStruct as AggExprStruct, expr::ExprStruct, literal::Value, AggExpr, Expr,
+            ScalarFunc,
+        },
+        spark_operator::{operator::OpStruct, Operator},
+        spark_partitioning::{partitioning::PartitioningStruct, Partitioning as SparkPartitioning},
+    },
+};
+
+// For clippy error on type_complexity.
+type ExecResult<T> = Result<T, ExecutionError>;
+type PhyAggResult = Result<Vec<Arc<dyn AggregateExpr>>, ExecutionError>;
+type PhyExprResult = Result<Vec<(Arc<dyn PhysicalExpr>, String)>, ExecutionError>;
+type PartitionPhyExprResult = Result<Vec<Arc<dyn PhysicalExpr>>, ExecutionError>;
+
+/// The query planner for converting Spark query plans to DataFusion query plans.
+pub struct PhysicalPlanner {
+    // The execution context id of this planner.
+    exec_context_id: i64,
+    execution_props: ExecutionProps,
+}
+
+impl Default for PhysicalPlanner {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PhysicalPlanner {
+    pub fn new() -> Self {
+        let execution_props = ExecutionProps::new();
+        Self {
+            exec_context_id: -1,
+            execution_props,
+        }
+    }
+
+    pub fn with_exec_id(self, exec_context_id: i64) -> Self {
+        Self {
+            exec_context_id,
+            execution_props: self.execution_props,
+        }
+    }
+
+    /// Create a DataFusion physical expression from Spark physical expression
+    fn create_expr(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        match spark_expr.expr_struct.as_ref().unwrap() {
+            ExprStruct::Add(expr) => self.create_binary_expr(
+                expr.left.as_ref().unwrap(),
+                expr.right.as_ref().unwrap(),
+                expr.return_type.as_ref(),
+                DataFusionOperator::Plus,
+                input_schema,
+            ),
+            ExprStruct::Subtract(expr) => self.create_binary_expr(
+                expr.left.as_ref().unwrap(),
+                expr.right.as_ref().unwrap(),
+                expr.return_type.as_ref(),
+                DataFusionOperator::Minus,
+                input_schema,
+            ),
+            ExprStruct::Multiply(expr) => self.create_binary_expr(
+                expr.left.as_ref().unwrap(),
+                expr.right.as_ref().unwrap(),
+                expr.return_type.as_ref(),
+                DataFusionOperator::Multiply,
+                input_schema,
+            ),
+            ExprStruct::Divide(expr) => self.create_binary_expr(
+                expr.left.as_ref().unwrap(),
+                expr.right.as_ref().unwrap(),
+                expr.return_type.as_ref(),
+                DataFusionOperator::Divide,
+                input_schema,
+            ),
+            ExprStruct::Remainder(expr) => self.create_binary_expr(
+                expr.left.as_ref().unwrap(),
+                expr.right.as_ref().unwrap(),
+                expr.return_type.as_ref(),
+                DataFusionOperator::Modulo,
+                input_schema,
+            ),
+            ExprStruct::Eq(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::Eq;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Neq(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::NotEq;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Gt(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::Gt;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::GtEq(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::GtEq;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Lt(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::Lt;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::LtEq(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::LtEq;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Bound(bound) => {
+                let idx = bound.index as usize;
+                let column_name = format!("col_{}", idx);
+                Ok(Arc::new(Column::new(&column_name, idx)))
+            }
+            ExprStruct::IsNotNull(is_notnull) => {
+                let child = self.create_expr(is_notnull.child.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(IsNotNullExpr::new(child)))
+            }
+            ExprStruct::IsNull(is_null) => {
+                let child = self.create_expr(is_null.child.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(IsNullExpr::new(child)))
+            }
+            ExprStruct::And(and) => {
+                let left = self.create_expr(and.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(and.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::And;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Or(or) => {
+                let left = self.create_expr(or.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(or.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::Or;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Literal(literal) => {
+                let data_type = to_arrow_datatype(literal.datatype.as_ref().unwrap());
+                let scalar_value = if literal.is_null {
+                    match data_type {
+                        DataType::Boolean => ScalarValue::Boolean(None),
+                        DataType::Int8 => ScalarValue::Int8(None),
+                        DataType::Int16 => ScalarValue::Int16(None),
+                        DataType::Int32 => ScalarValue::Int32(None),
+                        DataType::Int64 => ScalarValue::Int64(None),
+                        DataType::Float32 => ScalarValue::Float32(None),
+                        DataType::Float64 => ScalarValue::Float64(None),
+                        DataType::Utf8 => ScalarValue::Utf8(None),
+                        DataType::Date32 => ScalarValue::Date32(None),
+                        DataType::Timestamp(TimeUnit::Microsecond, timezone) => {
+                            ScalarValue::TimestampMicrosecond(None, timezone)
+                        }
+                        DataType::Binary => ScalarValue::Binary(None),
+                        DataType::Decimal128(p, s) => ScalarValue::Decimal128(None, p, s),
+                        DataType::Null => ScalarValue::Null,
+                        dt => {
+                            return Err(ExecutionError::GeneralError(format!(
+                                "{:?} is not supported in Comet",
+                                dt
+                            )))
+                        }
+                    }
+                } else {
+                    match literal.value.as_ref().unwrap() {
+                        Value::BoolVal(value) => ScalarValue::Boolean(Some(*value)),
+                        Value::ByteVal(value) => ScalarValue::Int8(Some(*value as i8)),
+                        Value::ShortVal(value) => ScalarValue::Int16(Some(*value as i16)),
+                        Value::IntVal(value) => match data_type {
+                            DataType::Int32 => ScalarValue::Int32(Some(*value)),
+                            DataType::Date32 => ScalarValue::Date32(Some(*value)),
+                            dt => {
+                                return Err(ExecutionError::GeneralError(format!(
+                                    "Expected either 'Int32' or 'Date32' for IntVal, but found {:?}",
+                                    dt
+                                )))
+                            }
+                        },
+                        Value::LongVal(value) => match data_type {
+                            DataType::Int64 => ScalarValue::Int64(Some(*value)),
+                            DataType::Timestamp(TimeUnit::Microsecond, None) => {
+                                ScalarValue::TimestampMicrosecond(Some(*value), None)
+                            }
+                            DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => {
+                                ScalarValue::TimestampMicrosecond(Some(*value), Some(tz))
+                            }
+                            dt => {
+                                return Err(ExecutionError::GeneralError(format!(
+                                    "Expected either 'Int64' or 'Timestamp' for LongVal, but found {:?}",
+                                    dt
+                                )))
+                            }
+                        },
+                        Value::FloatVal(value) => ScalarValue::Float32(Some(*value)),
+                        Value::DoubleVal(value) => ScalarValue::Float64(Some(*value)),
+                        Value::StringVal(value) => ScalarValue::Utf8(Some(value.clone())),
+                        Value::BytesVal(value) => ScalarValue::Binary(Some(value.clone())),
+                        Value::DecimalVal(value) => {
+                            let big_integer = BigInt::from_signed_bytes_be(value);
+                            let integer = big_integer.to_i128().ok_or_else(|| {
+                                ExecutionError::GeneralError(format!(
+                                    "Cannot parse {:?} as i128 for Decimal literal",
+                                    big_integer
+                                ))
+                            })?;
+
+                            match data_type {
+                                DataType::Decimal128(p, s) => {
+                                    ScalarValue::Decimal128(Some(integer), p, s)
+                                }
+                                dt => {
+                                    return Err(ExecutionError::GeneralError(format!(
+                                        "Decimal literal's data type should be Decimal128 but got {:?}",
+                                        dt
+                                    )))
+                                }
+                            }
+                        }
+                    }
+                };
+                Ok(Arc::new(DataFusionLiteral::new(scalar_value)))
+            }
+            ExprStruct::Cast(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                let timezone = expr.timezone.clone();
+                Ok(Arc::new(Cast::new(child, datatype, timezone)))
+            }
+            ExprStruct::Hour(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let timezone = expr.timezone.clone();
+
+                Ok(Arc::new(HourExec::new(child, timezone)))
+            }
+            ExprStruct::Minute(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let timezone = expr.timezone.clone();
+
+                Ok(Arc::new(MinuteExec::new(child, timezone)))
+            }
+            ExprStruct::Second(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let timezone = expr.timezone.clone();
+
+                Ok(Arc::new(SecondExec::new(child, timezone)))
+            }
+            ExprStruct::TruncDate(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema.clone())?;
+                let format = self.create_expr(expr.format.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(DateTruncExec::new(child, format)))
+            }
+            ExprStruct::TruncTimestamp(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema.clone())?;
+                let format = self.create_expr(expr.format.as_ref().unwrap(), input_schema)?;
+                let timezone = expr.timezone.clone();
+
+                Ok(Arc::new(TimestampTruncExec::new(child, format, timezone)))
+            }
+            ExprStruct::Substring(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                // Spark Substring's start is 1-based when start > 0
+                let start = expr.start - i32::from(expr.start > 0);
+                let len = expr.len;
+
+                Ok(Arc::new(SubstringExec::new(
+                    child,
+                    start as i64,
+                    len as u64,
+                )))
+            }
+            ExprStruct::StringSpace(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(StringSpaceExec::new(child)))
+            }
+            ExprStruct::Contains(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(Contains::new(left, right)))
+            }
+            ExprStruct::StartsWith(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(StartsWith::new(left, right)))
+            }
+            ExprStruct::EndsWith(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(EndsWith::new(left, right)))
+            }
+            ExprStruct::Like(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+
+                Ok(Arc::new(Like::new(left, right)))
+            }
+            ExprStruct::CheckOverflow(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let data_type = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                let fail_on_error = expr.fail_on_error;
+
+                Ok(Arc::new(CheckOverflow::new(
+                    child,
+                    data_type,
+                    fail_on_error,
+                )))
+            }
+            ExprStruct::ScalarFunc(expr) => self.create_scalar_function_expr(expr, input_schema),
+            ExprStruct::EqNullSafe(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::IsNotDistinctFrom;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::NeqNullSafe(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::IsDistinctFrom;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::BitwiseAnd(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::BitwiseAnd;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::BitwiseNot(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(BitwiseNotExpr::new(child)))
+            }
+            ExprStruct::BitwiseOr(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::BitwiseOr;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::BitwiseXor(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::BitwiseXor;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::BitwiseShiftRight(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::BitwiseShiftRight;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::BitwiseShiftLeft(expr) => {
+                let left = self.create_expr(expr.left.as_ref().unwrap(), input_schema.clone())?;
+                let right = self.create_expr(expr.right.as_ref().unwrap(), input_schema)?;
+                let op = DataFusionOperator::BitwiseShiftLeft;
+                Ok(Arc::new(BinaryExpr::new(left, op, right)))
+            }
+            ExprStruct::Abs(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema.clone())?;
+                let args = vec![child];
+                let expr = create_physical_expr(
+                    &BuiltinScalarFunction::Abs,
+                    &args,
+                    &input_schema,
+                    &self.execution_props,
+                )?;
+                Ok(expr)
+            }
+            ExprStruct::CaseWhen(case_when) => {
+                let when_then_pairs = case_when
+                    .when
+                    .iter()
+                    .map(|x| self.create_expr(x, input_schema.clone()).unwrap())
+                    .zip(
+                        case_when
+                            .then
+                            .iter()
+                            .map(|then| self.create_expr(then, input_schema.clone()).unwrap()),
+                    )
+                    .collect::<Vec<_>>();
+
+                let else_phy_expr = match &case_when.else_expr {
+                    None => None,
+                    Some(_) => {
+                        Some(self.create_expr(case_when.else_expr.as_ref().unwrap(), input_schema)?)
+                    }
+                };
+                Ok(Arc::new(CaseExpr::try_new(
+                    None,
+                    when_then_pairs,
+                    else_phy_expr,
+                )?))
+            }
+            ExprStruct::In(expr) => {
+                let value =
+                    self.create_expr(expr.in_value.as_ref().unwrap(), input_schema.clone())?;
+                let list = expr
+                    .lists
+                    .iter()
+                    .map(|x| self.create_expr(x, input_schema.clone()).unwrap())
+                    .collect::<Vec<_>>();
+                Ok(Arc::new(InListExpr::new(value, list, expr.negated, None)))
+            }
+            ExprStruct::If(expr) => {
+                let if_expr =
+                    self.create_expr(expr.if_expr.as_ref().unwrap(), input_schema.clone())?;
+                let true_expr =
+                    self.create_expr(expr.true_expr.as_ref().unwrap(), input_schema.clone())?;
+                let false_expr =
+                    self.create_expr(expr.false_expr.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(IfExpr::new(if_expr, true_expr, false_expr)))
+            }
+            ExprStruct::Not(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(NotExpr::new(child)))
+            }
+            ExprStruct::Negative(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                Ok(Arc::new(NegativeExpr::new(child)))
+            }
+            ExprStruct::NormalizeNanAndZero(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let data_type = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                Ok(Arc::new(NormalizeNaNAndZero::new(data_type, child)))
+            }
+            ExprStruct::Subquery(expr) => {
+                let id = expr.id;
+                let data_type = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                Ok(Arc::new(Subquery::new(self.exec_context_id, id, data_type)))
+            }
+            expr => Err(ExecutionError::GeneralError(format!(
+                "Not implemented: {:?}",
+                expr
+            ))),
+        }
+    }
+
+    /// Create a DataFusion physical sort expression from Spark physical expression
+    fn create_sort_expr<'a>(
+        &'a self,
+        spark_expr: &'a Expr,
+        input_schema: SchemaRef,
+    ) -> Result<PhysicalSortExpr, ExecutionError> {
+        match spark_expr.expr_struct.as_ref().unwrap() {
+            ExprStruct::SortOrder(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
+                let descending = expr.direction == 1;
+                let nulls_first = expr.null_ordering == 0;
+
+                let options = SortOptions {
+                    descending,
+                    nulls_first,
+                };
+
+                Ok(PhysicalSortExpr {
+                    expr: child,
+                    options,
+                })
+            }
+            expr => Err(ExecutionError::GeneralError(format!(
+                "{:?} isn't a SortOrder",
+                expr
+            ))),
+        }
+    }
+
+    fn create_binary_expr(
+        &self,
+        left: &Expr,
+        right: &Expr,
+        return_type: Option<&spark_expression::DataType>,
+        op: DataFusionOperator,
+        input_schema: SchemaRef,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let left = self.create_expr(left, input_schema.clone())?;
+        let right = self.create_expr(right, input_schema.clone())?;
+        match (
+            op,
+            left.data_type(&input_schema),
+            right.data_type(&input_schema),
+        ) {
+            (
+                DataFusionOperator::Plus
+                | DataFusionOperator::Minus
+                | DataFusionOperator::Multiply
+                | DataFusionOperator::Modulo,
+                Ok(DataType::Decimal128(p1, s1)),
+                Ok(DataType::Decimal128(p2, s2)),
+            ) => {
+                let data_type = return_type.map(to_arrow_datatype).unwrap();
+                // For some Decimal128 operations, we need wider internal digits.
+                // Cast left and right to Decimal256 and cast the result back to Decimal128
+                let left = Arc::new(Cast::new_without_timezone(
+                    left,
+                    DataType::Decimal256(p1, s1),
+                ));
+                let right = Arc::new(Cast::new_without_timezone(
+                    right,
+                    DataType::Decimal256(p2, s2),
+                ));
+                let child = Arc::new(BinaryExpr::new(left, op, right));
+                Ok(Arc::new(Cast::new_without_timezone(child, data_type)))
+            }
+            (
+                DataFusionOperator::Divide,
+                Ok(DataType::Decimal128(_p1, _s1)),
+                Ok(DataType::Decimal128(_p2, _s2)),
+            ) => {
+                let data_type = return_type.map(to_arrow_datatype).unwrap();
+                let fun_expr = create_comet_physical_fun(
+                    "decimal_div",
+                    &self.execution_props,
+                    data_type.clone(),
+                )?;
+                Ok(Arc::new(ScalarFunctionExpr::new(
+                    "decimal_div",
+                    fun_expr,
+                    vec![left, right],
+                    data_type,
+                    None,
+                )))
+            }
+            _ => Ok(Arc::new(BinaryExpr::new(left, op, right))),
+        }
+    }
+
+    /// Create a DataFusion physical plan from Spark physical plan.
+    ///
+    /// Note that we need `input_batches` parameter because we need to know the exact schema (not
+    /// only data type but also dictionary-encoding) at `ScanExec`s. It is because some DataFusion
+    /// operators, e.g., `ProjectionExec`, gets child operator schema during initialization and
+    /// uses it later for `RecordBatch`. We may be able to get rid of it once `RecordBatch`
+    /// relaxes schema check.
+    ///
+    /// Note that we return created `Scan`s which will be kept at JNI API. JNI calls will use it to
+    /// feed in new input batch from Spark JVM side.
+    pub fn create_plan<'a>(
+        &'a self,
+        spark_plan: &'a Operator,
+        input_batches: &mut Vec<InputBatch>,
+    ) -> Result<(Vec<ScanExec>, Arc<dyn ExecutionPlan>), ExecutionError> {
+        let children = &spark_plan.children;
+        match spark_plan.op_struct.as_ref().unwrap() {
+            OpStruct::Projection(project) => {
+                assert!(children.len() == 1);
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+                let exprs: PhyExprResult = project
+                    .project_list
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, expr)| {
+                        self.create_expr(expr, child.schema())
+                            .map(|r| (r, format!("col_{}", idx)))
+                    })
+                    .collect();
+                Ok((scans, Arc::new(ProjectionExec::try_new(exprs?, child)?)))
+            }
+            OpStruct::Filter(filter) => {
+                assert!(children.len() == 1);
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+                let predicate =
+                    self.create_expr(filter.predicate.as_ref().unwrap(), child.schema())?;
+
+                Ok((scans, Arc::new(FilterExec::try_new(predicate, child)?)))
+            }
+            OpStruct::HashAgg(agg) => {
+                assert!(children.len() == 1);
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+
+                let group_exprs: PhyExprResult = agg
+                    .grouping_exprs
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, expr)| {
+                        self.create_expr(expr, child.schema())
+                            .map(|r| (r, format!("col_{}", idx)))
+                    })
+                    .collect();
+                let group_by = PhysicalGroupBy::new_single(group_exprs?);
+                let schema = child.schema();
+
+                let mode = if agg.mode == 0 {
+                    DFAggregateMode::Partial
+                } else {
+                    DFAggregateMode::Final
+                };
+
+                let agg_exprs: PhyAggResult = agg
+                    .agg_exprs
+                    .iter()
+                    .map(|expr| self.create_agg_expr(expr, schema.clone()))
+                    .collect();
+
+                let num_agg = agg.agg_exprs.len();
+                let aggregate = Arc::new(
+                    datafusion::physical_plan::aggregates::AggregateExec::try_new(
+                        mode,
+                        group_by,
+                        agg_exprs?,
+                        vec![None; num_agg], // no filter expressions
+                        vec![None; num_agg], // no order by expressions
+                        child.clone(),
+                        schema.clone(),
+                    )?,
+                );
+                let result_exprs: PhyExprResult = agg
+                    .result_exprs
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, expr)| {
+                        self.create_expr(expr, child.schema())
+                            .map(|r| (r, format!("col_{}", idx)))
+                    })
+                    .collect();
+
+                let exec: Arc<dyn ExecutionPlan> = if agg.result_exprs.is_empty() {
+                    aggregate
+                } else {
+                    // For final aggregation, DF's hash aggregate exec doesn't support Spark's
+                    // aggregate result expressions like `COUNT(col) + 1`, but instead relying
+                    // on additional `ProjectionExec` to handle the case. Therefore, here we'll
+                    // add a projection node on top of the aggregate node.
+                    //
+                    // Note that `result_exprs` should only be set for final aggregation on the
+                    // Spark side.
+                    Arc::new(ProjectionExec::try_new(result_exprs?, aggregate)?)
+                };
+
+                Ok((scans, exec))
+            }
+            OpStruct::Limit(limit) => {
+                assert!(children.len() == 1);
+                let num = limit.limit;
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+
+                Ok((scans, Arc::new(LocalLimitExec::new(child, num as usize))))
+            }
+            OpStruct::Sort(sort) => {
+                assert!(children.len() == 1);
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+
+                let exprs: Result<Vec<PhysicalSortExpr>, ExecutionError> = sort
+                    .sort_orders
+                    .iter()
+                    .map(|expr| self.create_sort_expr(expr, child.schema()))
+                    .collect();
+
+                let fetch = sort.fetch.map(|num| num as usize);
+
+                let copy_exec = Arc::new(CopyExec::new(child));
+
+                Ok((
+                    scans,
+                    Arc::new(SortExec::new(exprs?, copy_exec).with_fetch(fetch)),
+                ))
+            }
+            OpStruct::Scan(scan) => {
+                let fields = scan.fields.iter().map(to_arrow_datatype).collect_vec();
+                if input_batches.is_empty() {
+                    return Err(ExecutionError::GeneralError(
+                        "No input batch for scan".to_string(),
+                    ));
+                }
+                // Consumes the first input batch source for the scan
+                let input_batch = input_batches.remove(0);
+
+                // The `ScanExec` operator will take actual arrays from Spark during execution
+                let scan = ScanExec::new(input_batch, fields);
+                Ok((vec![scan.clone()], Arc::new(scan)))
+            }
+            OpStruct::Expand(expand) => {
+                assert!(children.len() == 1);
+                let (scans, child) = self.create_plan(&children[0], input_batches)?;
+
+                let mut projections = vec![];
+                let mut projection = vec![];
+
+                expand.project_list.iter().try_for_each(|expr| {
+                    let expr = self.create_expr(expr, child.schema())?;
+                    projection.push(expr);
+
+                    if projection.len() == expand.num_expr_per_project as usize {
+                        projections.push(projection.clone());
+                        projection = vec![];
+                    }
+
+                    Ok::<(), ExecutionError>(())
+                })?;
+
+                assert!(
+                    !projections.is_empty(),
+                    "Expand should have at least one projection"
+                );
+
+                let datatypes = projections[0]
+                    .iter()
+                    .map(|expr| expr.data_type(&child.schema()))
+                    .collect::<Result<Vec<DataType>, _>>()?;
+                let fields: Vec<Field> = datatypes
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, dt)| Field::new(format!("col_{}", idx), dt.clone(), true))
+                    .collect();
+                let schema = Arc::new(Schema::new(fields));
+
+                Ok((
+                    scans,
+                    Arc::new(CometExpandExec::new(projections, child, schema)),
+                ))
+            }
+        }
+    }
+
+    /// Create a DataFusion physical aggregate expression from Spark physical aggregate expression
+    fn create_agg_expr(
+        &self,
+        spark_expr: &AggExpr,
+        schema: SchemaRef,
+    ) -> Result<Arc<dyn AggregateExpr>, ExecutionError> {
+        match spark_expr.expr_struct.as_ref().unwrap() {
+            AggExprStruct::Count(expr) => {
+                let child = self.create_expr(&expr.children[0], schema)?;
+                Ok(Arc::new(Count::new(child, "count", DataType::Int64)))
+            }
+            AggExprStruct::Min(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), schema)?;
+                let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                Ok(Arc::new(Min::new(child, "min", datatype)))
+            }
+            AggExprStruct::Max(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), schema)?;
+                let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                Ok(Arc::new(Max::new(child, "max", datatype)))
+            }
+            AggExprStruct::Sum(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?;
+                let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+
+                match datatype {
+                    DataType::Decimal128(_, _) => {
+                        Ok(Arc::new(SumDecimal::new("sum", child, datatype)))
+                    }
+                    _ => {
+                        // cast to the result data type of SUM if necessary, we should not expect
+                        // a cast failure since it should have already been checked at Spark side
+                        let child = Arc::new(CastExpr::new(child, datatype.clone(), None));
+                        Ok(Arc::new(Sum::new(child, "sum", datatype)))
+                    }
+                }
+            }
+            AggExprStruct::Avg(expr) => {
+                let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?;
+                let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
+                let input_datatype = to_arrow_datatype(expr.sum_datatype.as_ref().unwrap());
+                match datatype {
+                    DataType::Decimal128(_, _) => Ok(Arc::new(AvgDecimal::new(
+                        child,
+                        "avg",
+                        datatype,
+                        input_datatype,
+                    ))),
+                    _ => {
+                        // cast to the result data type of AVG if the result data type is different
+                        // from the input type, e.g. AVG(Int32). We should not expect a cast
+                        // failure since it should have already been checked at Spark side.
+                        let child = Arc::new(CastExpr::new(child, datatype.clone(), None));
+                        Ok(Arc::new(Avg::new(child, "avg", datatype)))
+                    }
+                }
+            }
+        }
+    }
+
+    /// Create a DataFusion physical partitioning from Spark physical partitioning
+    fn create_partitioning(
+        &self,
+        spark_partitioning: &SparkPartitioning,
+        input_schema: SchemaRef,
+    ) -> Result<Partitioning, ExecutionError> {
+        match spark_partitioning.partitioning_struct.as_ref().unwrap() {
+            PartitioningStruct::HashPartition(hash_partition) => {
+                let exprs: PartitionPhyExprResult = hash_partition
+                    .hash_expression
+                    .iter()
+                    .map(|x| self.create_expr(x, input_schema.clone()))
+                    .collect();
+                Ok(Partitioning::Hash(
+                    exprs?,
+                    hash_partition.num_partitions as usize,
+                ))
+            }
+            PartitioningStruct::SinglePartition(_) => Ok(Partitioning::UnknownPartitioning(1)),
+        }
+    }
+
+    fn create_scalar_function_expr(
+        &self,
+        expr: &ScalarFunc,
+        input_schema: SchemaRef,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let args = expr
+            .args
+            .iter()
+            .map(|x| self.create_expr(x, input_schema.clone()).unwrap())
+            .collect::<Vec<_>>();
+
+        let fun_name = &expr.func;
+        let input_expr_types = args
+            .iter()
+            .map(|x| x.data_type(input_schema.as_ref()).unwrap())
+            .collect::<Vec<_>>();
+        let data_type = match expr.return_type.as_ref().map(to_arrow_datatype) {
+            Some(t) => t,
+            None => {
+                // If no data type is provided from Spark, we'll use DF's return type from the
+                // scalar function
+                // Note this assumes the `fun_name` is a defined function in DF. Otherwise, it'll
+                // throw error.
+                let fun = &BuiltinScalarFunction::from_str(fun_name)?;
+                fun.return_type(&input_expr_types)?
+            }
+        };
+        let fun_expr =
+            create_comet_physical_fun(fun_name, &self.execution_props, data_type.clone())?;
+
+        let scalar_expr: Arc<dyn PhysicalExpr> = Arc::new(ScalarFunctionExpr::new(
+            fun_name,
+            fun_expr,
+            args.to_vec(),
+            data_type,
+            None,
+        ));
+
+        Ok(scalar_expr)
+    }
+}
+
+impl From<DataFusionError> for ExecutionError {
+    fn from(value: DataFusionError) -> Self {
+        ExecutionError::DataFusionError(value.to_string())
+    }
+}
+
+impl From<ExecutionError> for DataFusionError {
+    fn from(value: ExecutionError) -> Self {
+        DataFusionError::Execution(value.to_string())
+    }
+}
+
+impl From<ExpressionError> for DataFusionError {
+    fn from(value: ExpressionError) -> Self {
+        DataFusionError::Execution(value.to_string())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{sync::Arc, task::Poll};
+
+    use futures::{poll, StreamExt};
+
+    use arrow_array::{DictionaryArray, Int32Array, StringArray};
+    use arrow_schema::DataType;
+    use datafusion::{physical_plan::common::collect, prelude::SessionContext};
+    use tokio::sync::mpsc;
+
+    use crate::execution::{
+        datafusion::planner::PhysicalPlanner,
+        operators::InputBatch,
+        spark_expression::{self, literal},
+        spark_operator,
+    };
+
+    use spark_expression::expr::ExprStruct::*;
+    use spark_operator::{operator::OpStruct, Operator};
+
+    #[test]
+    fn test_unpack_dictionary_primitive() {
+        let op_scan = Operator {
+            children: vec![],
+            op_struct: Some(OpStruct::Scan(spark_operator::Scan {
+                fields: vec![spark_expression::DataType {
+                    type_id: 3, // Int32
+                    type_info: None,
+                }],
+            })),
+        };
+
+        let op = create_filter(op_scan, 3);
+        let planner = PhysicalPlanner::new();
+        let row_count = 100;
+
+        // Create a dictionary array with 100 values, and use it as input to the execution.
+        let keys = Int32Array::new((0..(row_count as i32)).map(|n| n % 4).collect(), None);
+        let values = Int32Array::from(vec![0, 1, 2, 3]);
+        let input_array = DictionaryArray::new(keys, Arc::new(values));
+        let input_batch = InputBatch::Batch(vec![Arc::new(input_array)], row_count);
+        let mut input_batches = vec![input_batch];
+
+        let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut input_batches).unwrap();
+
+        let session_ctx = SessionContext::new();
+        let task_ctx = session_ctx.task_ctx();
+        let mut stream = datafusion_plan.execute(0, task_ctx).unwrap();
+
+        let runtime = tokio::runtime::Runtime::new().unwrap();
+        let (tx, mut rx) = mpsc::channel(1);
+
+        // Separate thread to send the EOF signal once we've processed the only input batch
+        runtime.spawn(async move {
+            // Create a dictionary array with 100 values, and use it as input to the execution.
+            let keys = Int32Array::new((0..(row_count as i32)).map(|n| n % 4).collect(), None);
+            let values = Int32Array::from(vec![0, 1, 2, 3]);
+            let input_array = DictionaryArray::new(keys, Arc::new(values));
+            let input_batch1 = InputBatch::Batch(vec![Arc::new(input_array)], row_count);
+            let input_batch2 = InputBatch::EOF;
+
+            let batches = vec![input_batch1, input_batch2];
+
+            for batch in batches.into_iter() {
+                tx.send(batch).await.unwrap();
+            }
+        });
+
+        runtime.block_on(async move {
+            loop {
+                let batch = rx.recv().await.unwrap();
+                scans[0].set_input_batch(batch);
+                match poll!(stream.next()) {
+                    Poll::Ready(Some(batch)) => {
+                        assert!(batch.is_ok(), "got error {}", batch.unwrap_err());
+                        let batch = batch.unwrap();
+                        assert_eq!(batch.num_rows(), row_count / 4);
+                        // dictionary should be unpacked
+                        assert!(matches!(batch.column(0).data_type(), DataType::Int32));
+                    }
+                    Poll::Ready(None) => {
+                        break;
+                    }
+                    _ => {}
+                }
+            }
+        });
+    }
+
+    const STRING_TYPE_ID: i32 = 7;
+
+    #[test]
+    fn test_unpack_dictionary_string() {
+        let op_scan = Operator {
+            children: vec![],
+            op_struct: Some(OpStruct::Scan(spark_operator::Scan {
+                fields: vec![spark_expression::DataType {
+                    type_id: STRING_TYPE_ID, // String
+                    type_info: None,
+                }],
+            })),
+        };
+
+        let lit = spark_expression::Literal {
+            value: Some(literal::Value::StringVal("foo".to_string())),
+            datatype: Some(spark_expression::DataType {
+                type_id: STRING_TYPE_ID,
+                type_info: None,
+            }),
+            is_null: false,
+        };
+
+        let op = create_filter_literal(op_scan, STRING_TYPE_ID, lit);
+        let planner = PhysicalPlanner::new();
+
+        let row_count = 100;
+
+        let keys = Int32Array::new((0..(row_count as i32)).map(|n| n % 4).collect(), None);
+        let values = StringArray::from(vec!["foo", "bar", "hello", "comet"]);
+        let input_array = DictionaryArray::new(keys, Arc::new(values));
+        let input_batch = InputBatch::Batch(vec![Arc::new(input_array)], row_count);
+        let mut input_batches = vec![input_batch];
+
+        let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut input_batches).unwrap();
+
+        let session_ctx = SessionContext::new();
+        let task_ctx = session_ctx.task_ctx();
+        let mut stream = datafusion_plan.execute(0, task_ctx).unwrap();
+
+        let runtime = tokio::runtime::Runtime::new().unwrap();
+        let (tx, mut rx) = mpsc::channel(1);
+
+        // Separate thread to send the EOF signal once we've processed the only input batch
+        runtime.spawn(async move {
+            // Create a dictionary array with 100 values, and use it as input to the execution.
+            let keys = Int32Array::new((0..(row_count as i32)).map(|n| n % 4).collect(), None);
+            let values = StringArray::from(vec!["foo", "bar", "hello", "comet"]);
+            let input_array = DictionaryArray::new(keys, Arc::new(values));
+            let input_batch1 = InputBatch::Batch(vec![Arc::new(input_array)], row_count);
+
+            let input_batch2 = InputBatch::EOF;
+
+            let batches = vec![input_batch1, input_batch2];
+
+            for batch in batches.into_iter() {
+                tx.send(batch).await.unwrap();
+            }
+        });
+
+        runtime.block_on(async move {
+            loop {
+                let batch = rx.recv().await.unwrap();
+                scans[0].set_input_batch(batch);
+                match poll!(stream.next()) {
+                    Poll::Ready(Some(batch)) => {
+                        assert!(batch.is_ok(), "got error {}", batch.unwrap_err());
+                        let batch = batch.unwrap();
+                        assert_eq!(batch.num_rows(), row_count / 4);
+                        // string/binary should still be packed with dictionary
+                        assert!(matches!(
+                            batch.column(0).data_type(),
+                            DataType::Dictionary(_, _)
+                        ));
+                    }
+                    Poll::Ready(None) => {
+                        break;
+                    }
+                    _ => {}
+                }
+            }
+        });
+    }
+
+    #[tokio::test()]
+    #[allow(clippy::field_reassign_with_default)]
+    async fn to_datafusion_filter() {
+        let op_scan = spark_operator::Operator {
+            children: vec![],
+            op_struct: Some(spark_operator::operator::OpStruct::Scan(
+                spark_operator::Scan {
+                    fields: vec![spark_expression::DataType {
+                        type_id: 3,
+                        type_info: None,
+                    }],
+                },
+            )),
+        };
+
+        let op = create_filter(op_scan, 0);
+        let planner = PhysicalPlanner::new();
+
+        let mut input_batches = vec![InputBatch::EOF];
+        let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut input_batches).unwrap();
+
+        let scan = &mut scans[0];
+        scan.set_input_batch(InputBatch::EOF);
+
+        let session_ctx = SessionContext::new();
+        let task_ctx = session_ctx.task_ctx();
+
+        let stream = datafusion_plan.execute(0, task_ctx.clone()).unwrap();
+        let output = collect(stream).await.unwrap();
+        assert!(output.is_empty());
+    }
+
+    // Creates a filter operator which takes an `Int32Array` and selects rows that are equal to
+    // `value`.
+    fn create_filter(child_op: spark_operator::Operator, value: i32) -> spark_operator::Operator {
+        let lit = spark_expression::Literal {
+            value: Some(literal::Value::IntVal(value)),
+            datatype: Some(spark_expression::DataType {
+                type_id: 3,
+                type_info: None,
+            }),
+            is_null: false,
+        };
+
+        create_filter_literal(child_op, 3, lit)
+    }
+
+    fn create_filter_literal(
+        child_op: spark_operator::Operator,
+        type_id: i32,
+        lit: spark_expression::Literal,
+    ) -> spark_operator::Operator {
+        let mut expr = spark_expression::Expr::default();
+
+        let mut left = spark_expression::Expr::default();
+        left.expr_struct = Some(Bound(spark_expression::BoundReference {
+            index: 0,
+            datatype: Some(spark_expression::DataType {
+                type_id,
+                type_info: None,
+            }),
+        }));
+        let mut right = spark_expression::Expr::default();
+        right.expr_struct = Some(Literal(lit));
+
+        expr.expr_struct = Some(Eq(Box::new(spark_expression::Equal {
+            left: Some(Box::new(left)),
+            right: Some(Box::new(right)),
+        })));
+
+        let mut op = spark_operator::Operator::default();
+        op.children = vec![child_op];
+        op.op_struct = Some(OpStruct::Filter(spark_operator::Filter {
+            predicate: Some(expr),
+        }));
+        op
+    }
+}
diff --git a/core/src/execution/datafusion/spark_hash.rs b/core/src/execution/datafusion/spark_hash.rs
new file mode 100644
index 000000000..0413e4559
--- /dev/null
+++ b/core/src/execution/datafusion/spark_hash.rs
@@ -0,0 +1,473 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This includes utilities for hashing and murmur3 hashing.
+
+use arrow::datatypes::{ArrowNativeTypeOp, UInt16Type, UInt32Type, UInt64Type, UInt8Type};
+use std::sync::Arc;
+
+use datafusion::{
+    arrow::{
+        array::*,
+        datatypes::{
+            ArrowDictionaryKeyType, ArrowNativeType, DataType, Int16Type, Int32Type, Int64Type,
+            Int8Type, TimeUnit,
+        },
+    },
+    error::{DataFusionError, Result},
+};
+
+#[inline]
+fn spark_compatible_murmur3_hash<T: AsRef<[u8]>>(data: T, seed: u32) -> u32 {
+    #[inline]
+    fn mix_k1(mut k1: i32) -> i32 {
+        k1 = k1.mul_wrapping(0xcc9e2d51u32 as i32);
+        k1 = k1.rotate_left(15);
+        k1 = k1.mul_wrapping(0x1b873593u32 as i32);
+        k1
+    }
+
+    #[inline]
+    fn mix_h1(mut h1: i32, k1: i32) -> i32 {
+        h1 ^= k1;
+        h1 = h1.rotate_left(13);
+        h1 = h1.mul_wrapping(5).add_wrapping(0xe6546b64u32 as i32);
+        h1
+    }
+
+    #[inline]
+    fn fmix(mut h1: i32, len: i32) -> i32 {
+        h1 ^= len;
+        h1 ^= (h1 as u32 >> 16) as i32;
+        h1 = h1.mul_wrapping(0x85ebca6bu32 as i32);
+        h1 ^= (h1 as u32 >> 13) as i32;
+        h1 = h1.mul_wrapping(0xc2b2ae35u32 as i32);
+        h1 ^= (h1 as u32 >> 16) as i32;
+        h1
+    }
+
+    #[inline]
+    unsafe fn hash_bytes_by_int(data: &[u8], seed: u32) -> i32 {
+        // safety: data length must be aligned to 4 bytes
+        let mut h1 = seed as i32;
+        for i in (0..data.len()).step_by(4) {
+            let ints = data.as_ptr().add(i) as *const i32;
+            let mut half_word = ints.read_unaligned();
+            if cfg!(target_endian = "big") {
+                half_word = half_word.reverse_bits();
+            }
+            h1 = mix_h1(h1, mix_k1(half_word));
+        }
+        h1
+    }
+    let data = data.as_ref();
+    let len = data.len();
+    let len_aligned = len - len % 4;
+
+    // safety:
+    // avoid boundary checking in performance critical codes.
+    // all operations are garenteed to be safe
+    unsafe {
+        let mut h1 = hash_bytes_by_int(
+            std::slice::from_raw_parts(data.get_unchecked(0), len_aligned),
+            seed,
+        );
+
+        for i in len_aligned..len {
+            let half_word = *data.get_unchecked(i) as i8 as i32;
+            h1 = mix_h1(h1, mix_k1(half_word));
+        }
+        fmix(h1, len as i32) as u32
+    }
+}
+
+#[test]
+fn test_murmur3() {
+    let _hashes = ["", "a", "ab", "abc", "abcd", "abcde"]
+        .into_iter()
+        .map(|s| spark_compatible_murmur3_hash(s.as_bytes(), 42) as i32)
+        .collect::<Vec<_>>();
+    let _expected = vec![
+        142593372, 1485273170, -97053317, 1322437556, -396302900, 814637928,
+    ];
+}
+
+macro_rules! hash_array {
+    ($array_type:ident, $column: ident, $hashes: ident) => {
+        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+        if array.null_count() == 0 {
+            for (i, hash) in $hashes.iter_mut().enumerate() {
+                *hash = spark_compatible_murmur3_hash(&array.value(i), *hash);
+            }
+        } else {
+            for (i, hash) in $hashes.iter_mut().enumerate() {
+                if !array.is_null(i) {
+                    *hash = spark_compatible_murmur3_hash(&array.value(i), *hash);
+                }
+            }
+        }
+    };
+}
+
+macro_rules! hash_array_primitive {
+    ($array_type:ident, $column: ident, $ty: ident, $hashes: ident) => {
+        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+        let values = array.values();
+
+        if array.null_count() == 0 {
+            for (hash, value) in $hashes.iter_mut().zip(values.iter()) {
+                *hash = spark_compatible_murmur3_hash((*value as $ty).to_le_bytes(), *hash);
+            }
+        } else {
+            for (i, (hash, value)) in $hashes.iter_mut().zip(values.iter()).enumerate() {
+                if !array.is_null(i) {
+                    *hash = spark_compatible_murmur3_hash((*value as $ty).to_le_bytes(), *hash);
+                }
+            }
+        }
+    };
+}
+
+macro_rules! hash_array_primitive_float {
+    ($array_type:ident, $column: ident, $ty: ident, $ty2: ident, $hashes: ident) => {
+        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+        let values = array.values();
+
+        if array.null_count() == 0 {
+            for (hash, value) in $hashes.iter_mut().zip(values.iter()) {
+                // Spark uses 0 as hash for -0.0, see `Murmur3Hash` expression.
+                if *value == 0.0 && value.is_sign_negative() {
+                    *hash = spark_compatible_murmur3_hash((0 as $ty2).to_le_bytes(), *hash);
+                } else {
+                    *hash = spark_compatible_murmur3_hash((*value as $ty).to_le_bytes(), *hash);
+                }
+            }
+        } else {
+            for (i, (hash, value)) in $hashes.iter_mut().zip(values.iter()).enumerate() {
+                if !array.is_null(i) {
+                    // Spark uses 0 as hash for -0.0, see `Murmur3Hash` expression.
+                    if *value == 0.0 && value.is_sign_negative() {
+                        *hash = spark_compatible_murmur3_hash((0 as $ty2).to_le_bytes(), *hash);
+                    } else {
+                        *hash = spark_compatible_murmur3_hash((*value as $ty).to_le_bytes(), *hash);
+                    }
+                    *hash = spark_compatible_murmur3_hash((*value as $ty).to_le_bytes(), *hash);
+                }
+            }
+        }
+    };
+}
+
+macro_rules! hash_array_decimal {
+    ($array_type:ident, $column: ident, $hashes: ident) => {
+        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+
+        if array.null_count() == 0 {
+            for (i, hash) in $hashes.iter_mut().enumerate() {
+                *hash = spark_compatible_murmur3_hash(array.value(i).to_le_bytes(), *hash);
+            }
+        } else {
+            for (i, hash) in $hashes.iter_mut().enumerate() {
+                if !array.is_null(i) {
+                    *hash = spark_compatible_murmur3_hash(array.value(i).to_le_bytes(), *hash);
+                }
+            }
+        }
+    };
+}
+
+/// Hash the values in a dictionary array
+fn create_hashes_dictionary<K: ArrowDictionaryKeyType>(
+    array: &ArrayRef,
+    hashes_buffer: &mut [u32],
+) -> Result<()> {
+    let dict_array = array.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
+
+    // Hash each dictionary value once, and then use that computed
+    // hash for each key value to avoid a potentially expensive
+    // redundant hashing for large dictionary elements (e.g. strings)
+    let dict_values = Arc::clone(dict_array.values());
+    let mut dict_hashes = vec![0; dict_values.len()];
+    create_hashes(&[dict_values], &mut dict_hashes)?;
+
+    for (hash, key) in hashes_buffer.iter_mut().zip(dict_array.keys().iter()) {
+        if let Some(key) = key {
+            let idx = key.to_usize().ok_or_else(|| {
+                DataFusionError::Internal(format!(
+                    "Can not convert key value {:?} to usize in dictionary of type {:?}",
+                    key,
+                    dict_array.data_type()
+                ))
+            })?;
+            *hash = dict_hashes[idx]
+        } // no update for Null, consistent with other hashes
+    }
+    Ok(())
+}
+
+/// Creates hash values for every row, based on the values in the
+/// columns.
+///
+/// The number of rows to hash is determined by `hashes_buffer.len()`.
+/// `hashes_buffer` should be pre-sized appropriately
+pub fn create_hashes<'a>(
+    arrays: &[ArrayRef],
+    hashes_buffer: &'a mut [u32],
+) -> Result<&'a mut [u32]> {
+    for col in arrays {
+        match col.data_type() {
+            DataType::Boolean => {
+                let array = col.as_any().downcast_ref::<BooleanArray>().unwrap();
+                if array.null_count() == 0 {
+                    for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+                        *hash = spark_compatible_murmur3_hash(
+                            i32::from(array.value(i)).to_le_bytes(),
+                            *hash,
+                        );
+                    }
+                } else {
+                    for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+                        if !array.is_null(i) {
+                            *hash = spark_compatible_murmur3_hash(
+                                i32::from(array.value(i)).to_le_bytes(),
+                                *hash,
+                            );
+                        }
+                    }
+                }
+            }
+            DataType::Int8 => {
+                hash_array_primitive!(Int8Array, col, i32, hashes_buffer);
+            }
+            DataType::Int16 => {
+                hash_array_primitive!(Int16Array, col, i32, hashes_buffer);
+            }
+            DataType::Int32 => {
+                hash_array_primitive!(Int32Array, col, i32, hashes_buffer);
+            }
+            DataType::Int64 => {
+                hash_array_primitive!(Int64Array, col, i64, hashes_buffer);
+            }
+            DataType::Float32 => {
+                hash_array_primitive_float!(Float32Array, col, f32, i32, hashes_buffer);
+            }
+            DataType::Float64 => {
+                hash_array_primitive_float!(Float64Array, col, f64, i64, hashes_buffer);
+            }
+            DataType::Timestamp(TimeUnit::Second, _) => {
+                hash_array_primitive!(TimestampSecondArray, col, i64, hashes_buffer);
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                hash_array_primitive!(TimestampMillisecondArray, col, i64, hashes_buffer);
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                hash_array_primitive!(TimestampMicrosecondArray, col, i64, hashes_buffer);
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+                hash_array_primitive!(TimestampNanosecondArray, col, i64, hashes_buffer);
+            }
+            DataType::Date32 => {
+                hash_array_primitive!(Date32Array, col, i32, hashes_buffer);
+            }
+            DataType::Date64 => {
+                hash_array_primitive!(Date64Array, col, i64, hashes_buffer);
+            }
+            DataType::Utf8 => {
+                hash_array!(StringArray, col, hashes_buffer);
+            }
+            DataType::LargeUtf8 => {
+                hash_array!(LargeStringArray, col, hashes_buffer);
+            }
+            DataType::FixedSizeBinary(_) => {
+                hash_array!(FixedSizeBinaryArray, col, hashes_buffer);
+            }
+            DataType::Decimal128(_, _) => {
+                hash_array_decimal!(Decimal128Array, col, hashes_buffer);
+            }
+            DataType::Dictionary(index_type, _) => match **index_type {
+                DataType::Int8 => {
+                    create_hashes_dictionary::<Int8Type>(col, hashes_buffer)?;
+                }
+                DataType::Int16 => {
+                    create_hashes_dictionary::<Int16Type>(col, hashes_buffer)?;
+                }
+                DataType::Int32 => {
+                    create_hashes_dictionary::<Int32Type>(col, hashes_buffer)?;
+                }
+                DataType::Int64 => {
+                    create_hashes_dictionary::<Int64Type>(col, hashes_buffer)?;
+                }
+                DataType::UInt8 => {
+                    create_hashes_dictionary::<UInt8Type>(col, hashes_buffer)?;
+                }
+                DataType::UInt16 => {
+                    create_hashes_dictionary::<UInt16Type>(col, hashes_buffer)?;
+                }
+                DataType::UInt32 => {
+                    create_hashes_dictionary::<UInt32Type>(col, hashes_buffer)?;
+                }
+                DataType::UInt64 => {
+                    create_hashes_dictionary::<UInt64Type>(col, hashes_buffer)?;
+                }
+                _ => {
+                    return Err(DataFusionError::Internal(format!(
+                        "Unsupported dictionary type in hasher hashing: {}",
+                        col.data_type(),
+                    )))
+                }
+            },
+            _ => {
+                // This is internal because we should have caught this before.
+                return Err(DataFusionError::Internal(format!(
+                    "Unsupported data type in hasher: {}",
+                    col.data_type()
+                )));
+            }
+        }
+    }
+    Ok(hashes_buffer)
+}
+
+pub(crate) fn pmod(hash: u32, n: usize) -> usize {
+    let hash = hash as i32;
+    let n = n as i32;
+    let r = hash % n;
+    let result = if r < 0 { (r + n) % n } else { r };
+    result as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::{Float32Array, Float64Array};
+    use std::sync::Arc;
+
+    use crate::execution::datafusion::spark_hash::{create_hashes, pmod};
+    use datafusion::arrow::array::{ArrayRef, Int32Array, Int64Array, Int8Array, StringArray};
+
+    #[test]
+    fn test_i8() {
+        let i = Arc::new(Int8Array::from(vec![
+            Some(1),
+            Some(0),
+            Some(-1),
+            Some(i8::MAX),
+            Some(i8::MIN),
+        ])) as ArrayRef;
+        let mut hashes = vec![42; 5];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Spark Murmur3_x86_32
+        let expected = vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x43b4d8ed, 0x422a1365];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_i32() {
+        let i = Arc::new(Int32Array::from(vec![
+            Some(1),
+            Some(0),
+            Some(-1),
+            Some(i32::MAX),
+            Some(i32::MIN),
+        ])) as ArrayRef;
+        let mut hashes = vec![42; 5];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Spark Murmur3_x86_32
+        let expected = vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x07fb67e7, 0x2b1f0fc6];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_i64() {
+        let i = Arc::new(Int64Array::from(vec![
+            Some(1),
+            Some(0),
+            Some(-1),
+            Some(i64::MAX),
+            Some(i64::MIN),
+        ])) as ArrayRef;
+        let mut hashes = vec![42; 5];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Spark Murmur3_x86_32
+        let expected = vec![0x99f0149d, 0x9c67b85d, 0xc8008529, 0xa05b5d7b, 0xcd1e64fb];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_f32() {
+        let i = Arc::new(Float32Array::from(vec![
+            Some(1.0),
+            Some(0.0),
+            Some(-0.0),
+            Some(-1.0),
+            Some(99999999999.99999999999),
+            Some(-99999999999.99999999999),
+        ])) as ArrayRef;
+        let mut hashes = vec![42; 6];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Spark Murmur3_x86_32
+        let expected = vec![
+            0xe434cc39, 0x379fae8f, 0x379fae8f, 0xdc0da8eb, 0xcbdc340f, 0xc0361c86,
+        ];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_f64() {
+        let i = Arc::new(Float64Array::from(vec![
+            Some(1.0),
+            Some(0.0),
+            Some(-0.0),
+            Some(-1.0),
+            Some(99999999999.99999999999),
+            Some(-99999999999.99999999999),
+        ])) as ArrayRef;
+        let mut hashes = vec![42; 6];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Spark Murmur3_x86_32
+        let expected = vec![
+            0xe4876492, 0x9c67b85d, 0x9c67b85d, 0x13d81357, 0xb87e1595, 0xa0eef9f9,
+        ];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_str() {
+        let i = Arc::new(StringArray::from(vec!["hello", "bar", "", "😁", "天地"]));
+        let mut hashes = vec![42; 5];
+        create_hashes(&[i], &mut hashes).unwrap();
+
+        // generated with Murmur3Hash(Seq(Literal("")), 42).eval() since Spark is tested against
+        // this as well
+        let expected = vec![3286402344, 2486176763, 142593372, 885025535, 2395000894];
+        assert_eq!(hashes, expected);
+    }
+
+    #[test]
+    fn test_pmod() {
+        let i: Vec<u32> = vec![0x99f0149d, 0x9c67b85d, 0xc8008529, 0xa05b5d7b, 0xcd1e64fb];
+        let result = i.into_iter().map(|i| pmod(i, 200)).collect::<Vec<usize>>();
+
+        // expected partition from Spark with n=200
+        let expected = vec![69, 5, 193, 171, 115];
+        assert_eq!(result, expected);
+    }
+}
diff --git a/core/src/execution/jni_api.rs b/core/src/execution/jni_api.rs
new file mode 100644
index 000000000..e940a0853
--- /dev/null
+++ b/core/src/execution/jni_api.rs
@@ -0,0 +1,507 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Define JNI APIs which can be called from Java/Scala.
+
+use crate::execution::operators::{InputBatch, ScanExec};
+use arrow::{
+    array::{make_array, Array, ArrayData, ArrayRef},
+    datatypes::DataType as ArrowDataType,
+    ffi::{FFI_ArrowArray, FFI_ArrowSchema},
+};
+use arrow_array::RecordBatch;
+use datafusion::{
+    execution::{
+        disk_manager::DiskManagerConfig,
+        runtime_env::{RuntimeConfig, RuntimeEnv},
+    },
+    physical_plan::{display::DisplayableExecutionPlan, ExecutionPlan, SendableRecordBatchStream},
+    prelude::{SessionConfig, SessionContext},
+};
+use datafusion_common::DataFusionError;
+use futures::poll;
+use jni::{
+    errors::Result as JNIResult,
+    objects::{JClass, JMap, JObject, JString, ReleaseMode},
+    sys::{jbyteArray, jint, jlong, jlongArray},
+    JNIEnv,
+};
+use std::{collections::HashMap, sync::Arc, task::Poll};
+
+use super::{serde, utils::SparkArrowConvert};
+
+use crate::{
+    errors::{try_unwrap_or_throw, CometError},
+    execution::{
+        datafusion::planner::PhysicalPlanner, metrics::utils::update_comet_metric,
+        serde::to_arrow_datatype, spark_operator::Operator,
+    },
+    jvm_bridge::{jni_new_global_ref, JVMClasses},
+};
+use futures::stream::StreamExt;
+use jni::{
+    objects::{AutoArray, GlobalRef},
+    sys::{jbooleanArray, jobjectArray},
+};
+use tokio::runtime::Runtime;
+
+use log::info;
+
+/// Comet native execution context. Kept alive across JNI calls.
+struct ExecutionContext {
+    /// The id of the execution context.
+    pub id: i64,
+    /// The deserialized Spark plan
+    pub spark_plan: Operator,
+    /// The DataFusion root operator converted from the `spark_plan`
+    pub root_op: Option<Arc<dyn ExecutionPlan>>,
+    /// The input sources for the DataFusion plan
+    pub scans: Vec<ScanExec>,
+    /// The record batch stream to pull results from
+    pub stream: Option<SendableRecordBatchStream>,
+    /// The FFI arrays. We need to keep them alive here.
+    pub ffi_arrays: Vec<(Arc<FFI_ArrowArray>, Arc<FFI_ArrowSchema>)>,
+    /// Configurations for DF execution
+    pub conf: HashMap<String, String>,
+    /// The Tokio runtime used for async.
+    pub runtime: Runtime,
+    /// Native metrics
+    pub metrics: Arc<GlobalRef>,
+    /// DataFusion SessionContext
+    pub session_ctx: Arc<SessionContext>,
+    /// Whether to enable additional debugging checks & messages
+    pub debug_native: bool,
+}
+
+#[no_mangle]
+/// Accept serialized query plan and return the address of the native query plan.
+pub extern "system" fn Java_org_apache_comet_Native_createPlan(
+    env: JNIEnv,
+    _class: JClass,
+    id: jlong,
+    config_object: JObject,
+    serialized_query: jbyteArray,
+    metrics_node: JObject,
+) -> jlong {
+    try_unwrap_or_throw(env, || {
+        // Init JVM classes
+        JVMClasses::init(&env);
+
+        let bytes = env.convert_byte_array(serialized_query)?;
+
+        // Deserialize query plan
+        let spark_plan = serde::deserialize_op(bytes.as_slice())?;
+
+        // Sets up context
+        let mut configs = HashMap::new();
+
+        let config_map = JMap::from_env(&env, config_object)?;
+        config_map.iter()?.for_each(|config| {
+            let key: String = env.get_string(JString::from(config.0)).unwrap().into();
+            let value: String = env.get_string(JString::from(config.1)).unwrap().into();
+
+            configs.insert(key, value);
+        });
+
+        // Whether we've enabled additional debugging on the native side
+        let debug_native = configs
+            .get("debug_native")
+            .and_then(|x| x.parse::<bool>().ok())
+            .unwrap_or(false);
+
+        // Use multi-threaded tokio runtime to prevent blocking spawned tasks if any
+        let runtime = tokio::runtime::Builder::new_multi_thread()
+            .enable_all()
+            .build()?;
+
+        let metrics = Arc::new(jni_new_global_ref!(env, metrics_node)?);
+
+        // We need to keep the session context alive. Some session state like temporary
+        // dictionaries are stored in session context. If it is dropped, the temporary
+        // dictionaries will be dropped as well.
+        let session = prepare_datafusion_session_context(&configs)?;
+
+        let exec_context = Box::new(ExecutionContext {
+            id,
+            spark_plan,
+            root_op: None,
+            scans: vec![],
+            stream: None,
+            ffi_arrays: vec![],
+            conf: configs,
+            runtime,
+            metrics,
+            session_ctx: Arc::new(session),
+            debug_native,
+        });
+
+        Ok(Box::into_raw(exec_context) as i64)
+    })
+}
+
+/// Parse Comet configs and configure DataFusion session context.
+fn prepare_datafusion_session_context(
+    conf: &HashMap<String, String>,
+) -> Result<SessionContext, CometError> {
+    // Get the batch size from Comet JVM side
+    let batch_size = conf
+        .get("batch_size")
+        .ok_or(CometError::Internal(
+            "Config 'batch_size' is not specified from Comet JVM side".to_string(),
+        ))?
+        .parse::<usize>()?;
+
+    let mut rt_config = RuntimeConfig::new().with_disk_manager(DiskManagerConfig::NewOs);
+
+    // Set up memory limit if specified
+    if conf.contains_key("memory_limit") {
+        let memory_limit = conf.get("memory_limit").unwrap().parse::<usize>()?;
+
+        let memory_fraction = conf
+            .get("memory_fraction")
+            .ok_or(CometError::Internal(
+                "Config 'memory_fraction' is not specified from Comet JVM side".to_string(),
+            ))?
+            .parse::<f64>()?;
+
+        rt_config = rt_config.with_memory_limit(memory_limit, memory_fraction);
+    }
+
+    // Get Datafusion configuration from Spark Execution context
+    // can be configured in Comet Spark JVM using Spark --conf parameters
+    // e.g: spark-shell --conf spark.datafusion.sql_parser.parse_float_as_decimal=true
+    let df_config = conf
+        .iter()
+        .filter(|(k, _)| k.starts_with("datafusion."))
+        .map(|kv| (kv.0.to_owned(), kv.1.to_owned()))
+        .collect::<Vec<(String, String)>>();
+
+    let session_config =
+        SessionConfig::from_string_hash_map(std::collections::HashMap::from_iter(df_config))?
+            .with_batch_size(batch_size);
+
+    let runtime = RuntimeEnv::new(rt_config).unwrap();
+
+    Ok(SessionContext::new_with_config_rt(
+        session_config,
+        Arc::new(runtime),
+    ))
+}
+
+/// Prepares arrow arrays for output.
+fn prepare_output(
+    output: Result<RecordBatch, DataFusionError>,
+    env: JNIEnv,
+    exec_context: &mut ExecutionContext,
+) -> Result<jlongArray, CometError> {
+    let output_batch = output?;
+    let results = output_batch.columns();
+    let num_rows = output_batch.num_rows();
+
+    if exec_context.debug_native {
+        // Validate the output arrays.
+        for array in results.iter() {
+            let array_data = array.to_data();
+            array_data
+                .validate_full()
+                .expect("Invalid output array data");
+        }
+    }
+
+    let return_flag = 1;
+
+    let long_array = env.new_long_array((results.len() * 2) as i32 + 2)?;
+    env.set_long_array_region(long_array, 0, &[return_flag, num_rows as jlong])?;
+
+    let mut arrays = vec![];
+
+    let mut i = 0;
+    while i < results.len() {
+        let array_ref = results.get(i).ok_or(CometError::IndexOutOfBounds(i))?;
+        let (array, schema) = array_ref.to_data().to_spark()?;
+
+        unsafe {
+            let arrow_array = Arc::from_raw(array as *const FFI_ArrowArray);
+            let arrow_schema = Arc::from_raw(schema as *const FFI_ArrowSchema);
+            arrays.push((arrow_array, arrow_schema));
+        }
+
+        env.set_long_array_region(long_array, (i * 2) as i32 + 2, &[array, schema])?;
+        i += 1;
+    }
+
+    // Update metrics
+    update_metrics(&env, exec_context)?;
+
+    // Record the pointer to allocated Arrow Arrays
+    exec_context.ffi_arrays = arrays;
+
+    Ok(long_array)
+}
+
+#[no_mangle]
+/// Accept serialized query plan and the addresses of Arrow Arrays from Spark,
+/// then execute the query. Return addresses of arrow vector.
+pub extern "system" fn Java_org_apache_comet_Native_executePlan(
+    env: JNIEnv,
+    _class: JClass,
+    exec_context: jlong,
+    addresses_array: jobjectArray,
+    finishes: jbooleanArray,
+    batch_rows: jint,
+) -> jlongArray {
+    try_unwrap_or_throw(env, || {
+        let addresses_vec = convert_addresses_arrays(&env, addresses_array)?;
+        let mut all_inputs: Vec<Vec<ArrayRef>> = Vec::with_capacity(addresses_vec.len());
+
+        let exec_context = get_execution_context(exec_context);
+        for addresses in addresses_vec.iter() {
+            let mut inputs: Vec<ArrayRef> = vec![];
+
+            let array_num = addresses.size()? as usize;
+            assert_eq!(array_num % 2, 0, "Arrow Array addresses are invalid!");
+
+            let num_arrays = array_num / 2;
+            let array_elements = addresses.as_ptr();
+
+            let mut i: usize = 0;
+            while i < num_arrays {
+                let array_ptr = unsafe { *(array_elements.add(i * 2)) };
+                let schema_ptr = unsafe { *(array_elements.add(i * 2 + 1)) };
+                let array_data = ArrayData::from_spark((array_ptr, schema_ptr))?;
+
+                if exec_context.debug_native {
+                    // Validate the array data from JVM.
+                    array_data.validate_full().expect("Invalid array data");
+                }
+
+                inputs.push(make_array(array_data));
+                i += 1;
+            }
+
+            all_inputs.push(inputs);
+        }
+
+        // Prepares the input batches.
+        let eofs = env.get_boolean_array_elements(finishes, ReleaseMode::NoCopyBack)?;
+        let eof_flags = eofs.as_ptr();
+
+        // Whether reaching the end of input batches.
+        let mut finished = true;
+        let mut input_batches = all_inputs
+            .into_iter()
+            .enumerate()
+            .map(|(idx, inputs)| unsafe {
+                let eof = eof_flags.add(idx);
+
+                if *eof == 1 {
+                    InputBatch::EOF
+                } else {
+                    finished = false;
+                    InputBatch::new(inputs, Some(batch_rows as usize))
+                }
+            })
+            .collect::<Vec<InputBatch>>();
+
+        // Retrieve the query
+        let exec_context_id = exec_context.id;
+
+        // Initialize the execution stream.
+        // Because we don't know if input arrays are dictionary-encoded when we create
+        // query plan, we need to defer stream initialization to first time execution.
+        if exec_context.root_op.is_none() {
+            let planner = PhysicalPlanner::new().with_exec_id(exec_context_id);
+            let (scans, root_op) =
+                planner.create_plan(&exec_context.spark_plan, &mut input_batches)?;
+
+            exec_context.root_op = Some(root_op.clone());
+            exec_context.scans = scans;
+
+            if exec_context.debug_native {
+                let formatted_plan_str =
+                    DisplayableExecutionPlan::new(root_op.as_ref()).indent(true);
+                info!("Comet native query plan:\n {formatted_plan_str:}");
+            }
+
+            let task_ctx = exec_context.session_ctx.task_ctx();
+            let stream = exec_context
+                .root_op
+                .as_ref()
+                .unwrap()
+                .execute(0, task_ctx)?;
+            exec_context.stream = Some(stream);
+        } else {
+            input_batches
+                .into_iter()
+                .enumerate()
+                .for_each(|(idx, input_batch)| {
+                    let scan = &mut exec_context.scans[idx];
+
+                    // Set inputs at `Scan` node.
+                    scan.set_input_batch(input_batch);
+                });
+        }
+
+        loop {
+            // Polling the stream.
+            let next_item = exec_context.stream.as_mut().unwrap().next();
+            let poll_output = exec_context.runtime.block_on(async { poll!(next_item) });
+
+            match poll_output {
+                Poll::Ready(Some(output)) => {
+                    return prepare_output(output, env, exec_context);
+                }
+                Poll::Ready(None) => {
+                    // Reaches EOF of output.
+
+                    // Update metrics
+                    update_metrics(&env, exec_context)?;
+
+                    let long_array = env.new_long_array(1)?;
+                    env.set_long_array_region(long_array, 0, &[-1])?;
+
+                    return Ok(long_array);
+                }
+                // After reaching the end of any input, a poll pending means there are more than one
+                // blocking operators, we don't need go back-forth between JVM/Native. Just
+                // keeping polling.
+                Poll::Pending if finished => {
+                    // Update metrics
+                    update_metrics(&env, exec_context)?;
+
+                    // Output not ready yet
+                    continue;
+                }
+                // Not reaching the end of input yet, so a poll pending means there are blocking
+                // operators. Just returning to keep reading next input.
+                Poll::Pending => {
+                    // Update metrics
+                    update_metrics(&env, exec_context)?;
+                    return return_pending(env);
+                }
+            }
+        }
+    })
+}
+
+fn return_pending(env: JNIEnv) -> Result<jlongArray, CometError> {
+    let long_array = env.new_long_array(1)?;
+    env.set_long_array_region(long_array, 0, &[0])?;
+
+    Ok(long_array)
+}
+
+#[no_mangle]
+/// Peeks into next output if any.
+pub extern "system" fn Java_org_apache_comet_Native_peekNext(
+    env: JNIEnv,
+    _class: JClass,
+    exec_context: jlong,
+) -> jlongArray {
+    try_unwrap_or_throw(env, || {
+        // Retrieve the query
+        let exec_context = get_execution_context(exec_context);
+
+        if exec_context.stream.is_none() {
+            // Plan is not initialized yet.
+            return return_pending(env);
+        }
+
+        // Polling the stream.
+        let next_item = exec_context.stream.as_mut().unwrap().next();
+        let poll_output = exec_context.runtime.block_on(async { poll!(next_item) });
+
+        match poll_output {
+            Poll::Ready(Some(output)) => prepare_output(output, env, exec_context),
+            _ => {
+                // Update metrics
+                update_metrics(&env, exec_context)?;
+                return_pending(env)
+            }
+        }
+    })
+}
+
+#[no_mangle]
+/// Drop the native query plan object and context object.
+pub extern "system" fn Java_org_apache_comet_Native_releasePlan(
+    env: JNIEnv,
+    _class: JClass,
+    exec_context: jlong,
+) {
+    try_unwrap_or_throw(env, || unsafe {
+        let execution_context = get_execution_context(exec_context);
+        let _: Box<ExecutionContext> = Box::from_raw(execution_context);
+        Ok(())
+    })
+}
+
+/// Updates the metrics of the query plan.
+fn update_metrics(env: &JNIEnv, exec_context: &ExecutionContext) -> Result<(), CometError> {
+    let native_query = exec_context.root_op.as_ref().unwrap();
+    let metrics = exec_context.metrics.as_obj();
+    update_comet_metric(env, metrics, native_query)
+}
+
+/// Converts a Java array of address arrays to a Rust vector of address arrays.
+fn convert_addresses_arrays<'a>(
+    env: &'a JNIEnv<'a>,
+    addresses_array: jobjectArray,
+) -> JNIResult<Vec<AutoArray<'a, 'a, jlong>>> {
+    let array_len = env.get_array_length(addresses_array)?;
+    let mut res: Vec<AutoArray<jlong>> = Vec::new();
+
+    for i in 0..array_len {
+        let array: AutoArray<jlong> = env.get_array_elements(
+            env.get_object_array_element(addresses_array, i)?
+                .into_inner() as jlongArray,
+            ReleaseMode::NoCopyBack,
+        )?;
+        res.push(array);
+    }
+
+    Ok(res)
+}
+
+fn convert_datatype_arrays(
+    env: &'_ JNIEnv<'_>,
+    serialized_datatypes: jobjectArray,
+) -> JNIResult<Vec<ArrowDataType>> {
+    let array_len = env.get_array_length(serialized_datatypes)?;
+    let mut res: Vec<ArrowDataType> = Vec::new();
+
+    for i in 0..array_len {
+        let array = env
+            .get_object_array_element(serialized_datatypes, i)?
+            .into_inner() as jbyteArray;
+
+        let bytes = env.convert_byte_array(array)?;
+        let data_type = serde::deserialize_data_type(bytes.as_slice()).unwrap();
+        let arrow_dt = to_arrow_datatype(&data_type);
+        res.push(arrow_dt);
+    }
+
+    Ok(res)
+}
+
+fn get_execution_context<'a>(id: i64) -> &'a mut ExecutionContext {
+    unsafe {
+        (id as *mut ExecutionContext)
+            .as_mut()
+            .expect("Comet execution context shouldn't be null!")
+    }
+}
diff --git a/core/src/execution/kernels/hash.rs b/core/src/execution/kernels/hash.rs
new file mode 100644
index 000000000..de30f74cd
--- /dev/null
+++ b/core/src/execution/kernels/hash.rs
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::common::bit;
+use ahash::RandomState;
+use arrow::datatypes::{i256, ArrowNativeType};
+use arrow_array::{
+    downcast_dictionary_array, downcast_primitive_array, Array, ArrayAccessor, ArrayRef,
+    ArrowPrimitiveType, PrimitiveArray,
+};
+use std::fmt::Debug;
+
+pub fn hash(src: &[ArrayRef], dst: &mut [u64]) {
+    let state = RandomState::with_seed(42);
+    src.iter().enumerate().for_each(|(idx, v)| {
+        downcast_dictionary_array!(
+            v => {
+                let keys = v.keys();
+                let values = v.values();
+                downcast_primitive_array!(
+                    values => hash_dict_typed(&state, idx > 0, keys, values, dst),
+                    dt => panic!("Expected only primitive type but found {}", dt)
+                )
+            },
+            dt => {
+                downcast_primitive_array!(
+                    v => hash_typed(&state, idx > 0, v, dst),
+                    _ => panic!("Expected only primitive type but found {}", dt)
+                )
+            }
+        )
+    });
+}
+
+fn hash_typed<T>(state: &RandomState, mix: bool, array: T, dst: &mut [u64])
+where
+    T: ArrayAccessor,
+    T::Item: Hashable + Debug,
+{
+    let nullable = array.null_count() > 0;
+    let num_values = array.len();
+    if nullable {
+        for i in 0..num_values {
+            if !array.is_null(i) {
+                unsafe {
+                    let value = array.value_unchecked(i);
+                    hash1(state, mix, i, value, dst);
+                }
+            }
+        }
+    } else {
+        for i in 0..num_values {
+            unsafe {
+                let value = array.value_unchecked(i);
+                hash1(state, mix, i, value, dst);
+            }
+        }
+    }
+}
+
+fn hash_dict_typed<K, V>(
+    state: &RandomState,
+    mix: bool,
+    keys: &PrimitiveArray<K>,
+    values: V,
+    dst: &mut [u64],
+) where
+    K: ArrowPrimitiveType,
+    V: ArrayAccessor,
+    V::Item: Hashable + Debug,
+{
+    let nullable = keys.null_count() > 0;
+    let num_keys = keys.len();
+    let mut value_hashes = vec![0; values.len()];
+
+    for (i, value_hash) in value_hashes.iter_mut().enumerate() {
+        unsafe {
+            *value_hash = values.value_unchecked(i).create_hash(state);
+        }
+    }
+
+    if nullable {
+        for i in 0..num_keys {
+            if !keys.is_null(i) {
+                unsafe {
+                    let idx = keys.value_unchecked(i);
+                    let hash = value_hashes[idx.as_usize()];
+                    hash1_helper(mix, i, hash, dst);
+                }
+            }
+        }
+    } else {
+        for i in 0..num_keys {
+            unsafe {
+                let idx = keys.value_unchecked(i);
+                let hash = value_hashes[idx.as_usize()];
+                hash1_helper(mix, i, hash, dst);
+            }
+        }
+    }
+}
+
+#[inline(always)]
+fn hash1<T: Hashable>(state: &RandomState, mix: bool, i: usize, value: T, dst: &mut [u64]) {
+    let hash = value.create_hash(state);
+    hash1_helper(mix, i, hash, dst);
+}
+
+#[inline(always)]
+fn hash1_helper(mix: bool, i: usize, hash: u64, dst: &mut [u64]) {
+    dst[i] = if mix {
+        bit::mix_hash(dst[i], hash)
+    } else {
+        hash
+    }
+}
+
+pub(crate) trait Hashable {
+    fn create_hash(&self, state: &RandomState) -> u64;
+}
+
+macro_rules! impl_hashable {
+    ($($t:ty),+) => {
+        $(impl Hashable for $t {
+            #[inline]
+            fn create_hash(&self, state: &RandomState) -> u64 {
+                state.hash_one(self)
+            }
+        })+
+    };
+}
+
+impl_hashable!(i8, i16, i32, u8, u16, u32, u64, i128, i256);
+
+impl Hashable for i64 {
+    fn create_hash(&self, state: &RandomState) -> u64 {
+        state.hash_one(self)
+    }
+}
+
+impl Hashable for half::f16 {
+    fn create_hash(&self, _: &RandomState) -> u64 {
+        unimplemented!("hashing on f16 is not supported")
+    }
+}
+
+impl Hashable for f32 {
+    fn create_hash(&self, state: &RandomState) -> u64 {
+        state.hash_one(u32::from_ne_bytes(self.to_ne_bytes()))
+    }
+}
+
+impl Hashable for f64 {
+    fn create_hash(&self, state: &RandomState) -> u64 {
+        state.hash_one(u64::from_ne_bytes(self.to_ne_bytes()))
+    }
+}
diff --git a/core/src/execution/kernels/mod.rs b/core/src/execution/kernels/mod.rs
new file mode 100644
index 000000000..76d4e1807
--- /dev/null
+++ b/core/src/execution/kernels/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Kernels
+
+mod hash;
+pub use hash::hash;
+
+pub(crate) mod strings;
+pub(crate) mod temporal;
diff --git a/core/src/execution/kernels/strings.rs b/core/src/execution/kernels/strings.rs
new file mode 100644
index 000000000..2e5e67b67
--- /dev/null
+++ b/core/src/execution/kernels/strings.rs
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! String kernels
+
+use std::sync::Arc;
+
+use arrow::{
+    array::*,
+    buffer::{Buffer, MutableBuffer},
+    compute::kernels::substring::{substring as arrow_substring, substring_by_char},
+    datatypes::{DataType, Int32Type},
+};
+
+use crate::errors::ExpressionError;
+
+/// Returns an ArrayRef with a string consisting of `length` spaces.
+///
+/// # Preconditions
+///
+/// - elements in `length` must not be negative
+pub fn string_space(length: &dyn Array) -> Result<ArrayRef, ExpressionError> {
+    match length.data_type() {
+        DataType::Int32 => {
+            let array = length.as_any().downcast_ref::<Int32Array>().unwrap();
+            Ok(generic_string_space::<i32>(array))
+        }
+        DataType::Dictionary(_, _) => {
+            let dict = as_dictionary_array::<Int32Type>(length);
+            let values = string_space(dict.values())?;
+            let result = DictionaryArray::try_new(dict.keys().clone(), values)?;
+            Ok(Arc::new(result))
+        }
+        dt => panic!(
+            "Unsupported input type for function 'string_space': {:?}",
+            dt
+        ),
+    }
+}
+
+pub fn substring(array: &dyn Array, start: i64, length: u64) -> Result<ArrayRef, ExpressionError> {
+    match array.data_type() {
+        DataType::LargeUtf8 => substring_by_char(
+            array
+                .as_any()
+                .downcast_ref::<LargeStringArray>()
+                .expect("A large string is expected"),
+            start,
+            Some(length),
+        )
+        .map_err(|e| e.into())
+        .map(|t| make_array(t.into_data())),
+        DataType::Utf8 => substring_by_char(
+            array
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .expect("A string is expected"),
+            start,
+            Some(length),
+        )
+        .map_err(|e| e.into())
+        .map(|t| make_array(t.into_data())),
+        DataType::Binary | DataType::LargeBinary => {
+            arrow_substring(array, start, Some(length)).map_err(|e| e.into())
+        }
+        DataType::Dictionary(_, _) => {
+            let dict = as_dictionary_array::<Int32Type>(array);
+            let values = substring(dict.values(), start, length)?;
+            let result = DictionaryArray::try_new(dict.keys().clone(), values)?;
+            Ok(Arc::new(result))
+        }
+        dt => panic!("Unsupported input type for function 'substring': {:?}", dt),
+    }
+}
+
+/// Returns an ArrayRef with a substring starting from `start` and length.
+///
+/// # Preconditions
+///
+/// - `start` can be negative, in which case the start counts from the end of the string.
+/// - `array` must  be either [`StringArray`] or [`LargeStringArray`].
+///
+/// Note: this is different from arrow-rs `substring` kernel in that both `start` and `length` are
+/// `Int32Array` here.
+pub fn substring_with_array(
+    array: &dyn Array,
+    start: &Int32Array,
+    length: &Int32Array,
+) -> ArrayRef {
+    match array.data_type() {
+        DataType::LargeUtf8 => generic_substring(
+            array
+                .as_any()
+                .downcast_ref::<LargeStringArray>()
+                .expect("A large string is expected"),
+            start,
+            length,
+            |i| i as i64,
+        ),
+        DataType::Utf8 => generic_substring(
+            array
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .expect("A string is expected"),
+            start,
+            length,
+            |i| i,
+        ),
+        _ => panic!("substring does not support type {:?}", array.data_type()),
+    }
+}
+
+fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> ArrayRef {
+    let array_len = length.len();
+    let mut offsets = MutableBuffer::new((array_len + 1) * std::mem::size_of::<OffsetSize>());
+    let mut length_so_far = OffsetSize::zero();
+
+    // compute null bitmap (copy)
+    let null_bit_buffer = length.to_data().nulls().map(|b| b.buffer().clone());
+
+    // Gets slice of length array to access it directly for performance.
+    let length_data = length.to_data();
+    let lengths = length_data.buffers()[0].typed_data::<i32>();
+    let total = lengths.iter().map(|l| *l as usize).sum::<usize>();
+    let mut values = MutableBuffer::new(total);
+
+    offsets.push(length_so_far);
+
+    let blank = " ".as_bytes()[0];
+    values.resize(total, blank);
+
+    (0..array_len).for_each(|i| {
+        let current_len = lengths[i] as usize;
+
+        length_so_far += OffsetSize::from_usize(current_len).unwrap();
+        offsets.push(length_so_far);
+    });
+
+    let data = unsafe {
+        ArrayData::new_unchecked(
+            GenericStringArray::<OffsetSize>::DATA_TYPE,
+            array_len,
+            None,
+            null_bit_buffer,
+            0,
+            vec![offsets.into(), values.into()],
+            vec![],
+        )
+    };
+    make_array(data)
+}
+
+fn generic_substring<OffsetSize: OffsetSizeTrait, F>(
+    array: &GenericStringArray<OffsetSize>,
+    start: &Int32Array,
+    length: &Int32Array,
+    f: F,
+) -> ArrayRef
+where
+    F: Fn(i32) -> OffsetSize,
+{
+    assert_eq!(array.len(), start.len());
+    assert_eq!(array.len(), length.len());
+
+    // compute current offsets
+    let offsets = array.to_data().buffers()[0].clone();
+    let offsets: &[OffsetSize] = offsets.typed_data::<OffsetSize>();
+
+    // compute null bitmap (copy)
+    let null_bit_buffer = array.to_data().nulls().map(|b| b.buffer().clone());
+
+    // Gets slices of start and length arrays to access them directly for performance.
+    let start_data = start.to_data();
+    let length_data = length.to_data();
+    let starts = start_data.buffers()[0].typed_data::<i32>();
+    let lengths = length_data.buffers()[0].typed_data::<i32>();
+
+    // compute values
+    let array_data = array.to_data();
+    let values = &array_data.buffers()[1];
+    let data = values.as_slice();
+
+    // we have no way to estimate how much this will be.
+    let mut new_values = MutableBuffer::new(0);
+    let mut new_offsets: Vec<OffsetSize> = Vec::with_capacity(array.len() + 1);
+
+    let mut length_so_far = OffsetSize::zero();
+    new_offsets.push(length_so_far);
+    (0..array.len()).for_each(|i| {
+        // the length of this entry
+        let length_i: OffsetSize = offsets[i + 1] - offsets[i];
+        // compute where we should start slicing this entry
+        let start_pos: OffsetSize = f(starts[i]);
+
+        let start = offsets[i]
+            + if start_pos >= OffsetSize::zero() {
+                start_pos
+            } else {
+                length_i + start_pos
+            };
+
+        let start = start.clamp(offsets[i], offsets[i + 1]);
+        // compute the length of the slice
+        let slice_length: OffsetSize = f(lengths[i].max(0)).min(offsets[i + 1] - start);
+
+        length_so_far += slice_length;
+
+        new_offsets.push(length_so_far);
+
+        // we need usize for ranges
+        let start = start.to_usize().unwrap();
+        let slice_length = slice_length.to_usize().unwrap();
+
+        new_values.extend_from_slice(&data[start..start + slice_length]);
+    });
+
+    let data = unsafe {
+        ArrayData::new_unchecked(
+            GenericStringArray::<OffsetSize>::DATA_TYPE,
+            array.len(),
+            None,
+            null_bit_buffer,
+            0,
+            vec![Buffer::from_slice_ref(&new_offsets), new_values.into()],
+            vec![],
+        )
+    };
+    make_array(data)
+}
diff --git a/core/src/execution/kernels/temporal.rs b/core/src/execution/kernels/temporal.rs
new file mode 100644
index 000000000..ec7f2be7e
--- /dev/null
+++ b/core/src/execution/kernels/temporal.rs
@@ -0,0 +1,438 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! temporal kernels
+
+use chrono::{DateTime, Datelike, Duration, NaiveDateTime, Timelike, Utc};
+
+use std::sync::Arc;
+
+use arrow::{array::*, datatypes::DataType};
+use arrow_array::{
+    downcast_dictionary_array, downcast_temporal_array,
+    temporal_conversions::*,
+    timezone::Tz,
+    types::{ArrowTemporalType, Date32Type, TimestampMicrosecondType},
+    ArrowNumericType,
+};
+
+use arrow_schema::TimeUnit;
+
+use crate::errors::ExpressionError;
+
+// Copied from arrow_arith/temporal.rs
+macro_rules! return_compute_error_with {
+    ($msg:expr, $param:expr) => {
+        return {
+            Err(ExpressionError::ArrowError(format!(
+                "{}: {:?}",
+                $msg, $param
+            )))
+        }
+    };
+}
+
+// The number of days between the beginning of the proleptic gregorian calendar (0001-01-01)
+// and the beginning of the Unix Epoch (1970-01-01)
+const DAYS_TO_UNIX_EPOCH: i32 = 719_163;
+const MICROS_TO_UNIX_EPOCH: i64 = 62_167_132_800 * 1_000_000;
+
+// Copied from arrow_arith/temporal.rs with modification to the output datatype
+// Transforms a array of NaiveDate to an array of Date32 after applying an operation
+fn as_datetime_with_op<A: ArrayAccessor<Item = T::Native>, T: ArrowTemporalType, F>(
+    iter: ArrayIter<A>,
+    mut builder: PrimitiveBuilder<Date32Type>,
+    op: F,
+) -> Date32Array
+where
+    F: Fn(NaiveDateTime) -> i32,
+    i64: From<T::Native>,
+{
+    iter.into_iter().for_each(|value| {
+        if let Some(value) = value {
+            match as_datetime::<T>(i64::from(value)) {
+                Some(dt) => builder.append_value(op(dt)),
+                None => builder.append_null(),
+            }
+        } else {
+            builder.append_null();
+        }
+    });
+
+    builder.finish()
+}
+
+// Based on arrow_arith/temporal.rs:extract_component_from_datetime_array
+// Transforms an array of DateTime<Tz> to an arrayOf TimeStampMicrosecond after applying an
+// operation
+fn as_timestamp_tz_with_op<A: ArrayAccessor<Item = T::Native>, T: ArrowTemporalType, F>(
+    iter: ArrayIter<A>,
+    mut builder: PrimitiveBuilder<TimestampMicrosecondType>,
+    tz: &str,
+    op: F,
+) -> Result<TimestampMicrosecondArray, ExpressionError>
+where
+    F: Fn(DateTime<Tz>) -> i64,
+    i64: From<T::Native>,
+{
+    let tz: Tz = tz.parse()?;
+    for value in iter {
+        match value {
+            Some(value) => match as_datetime_with_timezone::<T>(value.into(), tz) {
+                Some(time) => builder.append_value(op(time)),
+                _ => {
+                    return Err(ExpressionError::ArrowError(
+                        "Unable to read value as datetime".to_string(),
+                    ));
+                }
+            },
+            None => builder.append_null(),
+        }
+    }
+    Ok(builder.finish())
+}
+
+#[inline]
+fn as_days_from_unix_epoch(dt: Option<NaiveDateTime>) -> i32 {
+    dt.unwrap().num_days_from_ce() - DAYS_TO_UNIX_EPOCH
+}
+
+// Apply the Tz to the Naive Date Time,,convert to UTC, and return as microseconds in Unix epoch
+#[inline]
+fn as_micros_from_unix_epoch_utc(dt: Option<DateTime<Tz>>) -> i64 {
+    dt.unwrap().with_timezone(&Utc).timestamp_micros()
+}
+
+#[inline]
+fn trunc_date_to_year<T: Datelike + Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+        .and_then(|d| d.with_hour(0))
+        .and_then(|d| d.with_day0(0))
+        .and_then(|d| d.with_month0(0))
+}
+
+/// returns the month of the beginning of the quarter
+#[inline]
+fn quarter_month<T: Datelike>(dt: &T) -> u32 {
+    1 + 3 * ((dt.month() - 1) / 3)
+}
+
+#[inline]
+fn trunc_date_to_quarter<T: Datelike + Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+        .and_then(|d| d.with_hour(0))
+        .and_then(|d| d.with_day0(0))
+        .and_then(|d| d.with_month(quarter_month(&d)))
+}
+
+#[inline]
+fn trunc_date_to_month<T: Datelike + Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+        .and_then(|d| d.with_hour(0))
+        .and_then(|d| d.with_day0(0))
+}
+
+#[inline]
+fn trunc_date_to_week<T>(dt: T) -> Option<T>
+where
+    T: Datelike + Timelike + std::ops::Sub<Duration, Output = T> + Copy,
+{
+    Some(dt)
+        .map(|d| d - Duration::seconds(60 * 60 * 24 * d.weekday() as i64))
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+        .and_then(|d| d.with_hour(0))
+}
+
+#[inline]
+fn trunc_date_to_day<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+        .and_then(|d| d.with_hour(0))
+}
+
+#[inline]
+fn trunc_date_to_hour<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+        .and_then(|d| d.with_minute(0))
+}
+
+#[inline]
+fn trunc_date_to_minute<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt)
+        .and_then(|d| d.with_nanosecond(0))
+        .and_then(|d| d.with_second(0))
+}
+
+#[inline]
+fn trunc_date_to_second<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt).and_then(|d| d.with_nanosecond(0))
+}
+
+#[inline]
+fn trunc_date_to_ms<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt).and_then(|d| d.with_nanosecond(1_000_000 * (d.nanosecond() / 1_000_000)))
+}
+
+#[inline]
+fn trunc_date_to_microsec<T: Timelike>(dt: T) -> Option<T> {
+    Some(dt).and_then(|d| d.with_nanosecond(1_000 * (d.nanosecond() / 1_000)))
+}
+
+pub fn date_trunc_dyn(array: &dyn Array, format: String) -> Result<ArrayRef, ExpressionError> {
+    match array.data_type().clone() {
+        DataType::Dictionary(_, _) => {
+            downcast_dictionary_array!(
+                array => {
+                    let truncated_values = date_trunc_dyn(array.values(), format)?;
+                    Ok(Arc::new(array.with_values(truncated_values)))
+                }
+                dt => return_compute_error_with!("date_trunc does not support", dt),
+            )
+        }
+        _ => {
+            downcast_temporal_array!(
+                array => {
+                   date_trunc(array, format)
+                    .map(|a| Arc::new(a) as ArrayRef)
+                }
+                dt => return_compute_error_with!("date_trunc does not support", dt),
+            )
+        }
+    }
+}
+
+pub fn date_trunc<T>(
+    array: &PrimitiveArray<T>,
+    format: String,
+) -> Result<Date32Array, ExpressionError>
+where
+    T: ArrowTemporalType + ArrowNumericType,
+    i64: From<T::Native>,
+{
+    let builder = Date32Builder::with_capacity(array.len());
+    let iter = ArrayIter::new(array);
+    match array.data_type() {
+        DataType::Date32 => match format.to_uppercase().as_str() {
+            "YEAR" | "YYYY" | "YY" => Ok(as_datetime_with_op::<&PrimitiveArray<T>, T, _>(
+                iter,
+                builder,
+                |dt| as_days_from_unix_epoch(trunc_date_to_year(dt)),
+            )),
+            "QUARTER" => Ok(as_datetime_with_op::<&PrimitiveArray<T>, T, _>(
+                iter,
+                builder,
+                |dt| as_days_from_unix_epoch(trunc_date_to_quarter(dt)),
+            )),
+            "MONTH" | "MON" | "MM" => Ok(as_datetime_with_op::<&PrimitiveArray<T>, T, _>(
+                iter,
+                builder,
+                |dt| as_days_from_unix_epoch(trunc_date_to_month(dt)),
+            )),
+            "WEEK" => Ok(as_datetime_with_op::<&PrimitiveArray<T>, T, _>(
+                iter,
+                builder,
+                |dt| as_days_from_unix_epoch(trunc_date_to_week(dt)),
+            )),
+            _ => Err(ExpressionError::ArrowError(format!(
+                "Unsupported format: {:?} for function 'date_trunc'",
+                format
+            ))),
+        },
+        dt => return_compute_error_with!(
+            "Unsupported input type '{:?}' for function 'date_trunc'",
+            dt
+        ),
+    }
+}
+
+pub fn timestamp_trunc_dyn(array: &dyn Array, format: String) -> Result<ArrayRef, ExpressionError> {
+    match array.data_type().clone() {
+        DataType::Dictionary(_, _) => {
+            downcast_dictionary_array!(
+                array => {
+                    let truncated_values = timestamp_trunc_dyn(array.values(), format)?;
+                    Ok(Arc::new(array.with_values(truncated_values)))
+                }
+                dt => return_compute_error_with!("timestamp_trunc does not support", dt),
+            )
+        }
+        _ => {
+            downcast_temporal_array!(
+                array => {
+                   timestamp_trunc(array, format)
+                    .map(|a| Arc::new(a) as ArrayRef)
+                }
+                dt => return_compute_error_with!("timestamp_trunc does not support", dt),
+            )
+        }
+    }
+}
+
+pub fn timestamp_trunc<T>(
+    array: &PrimitiveArray<T>,
+    format: String,
+) -> Result<TimestampMicrosecondArray, ExpressionError>
+where
+    T: ArrowTemporalType + ArrowNumericType,
+    i64: From<T::Native>,
+{
+    let builder = TimestampMicrosecondBuilder::with_capacity(array.len());
+    let iter = ArrayIter::new(array);
+    match array.data_type() {
+        DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => {
+            match format.to_uppercase().as_str() {
+                "YEAR" | "YYYY" | "YY" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_year(dt))
+                    })
+                }
+                "QUARTER" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_quarter(dt))
+                    })
+                }
+                "MONTH" | "MON" | "MM" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_month(dt))
+                    })
+                }
+                "WEEK" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_week(dt))
+                    })
+                }
+                "DAY" | "DD" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_day(dt))
+                    })
+                }
+                "HOUR" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_hour(dt))
+                    })
+                }
+                "MINUTE" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_minute(dt))
+                    })
+                }
+                "SECOND" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_second(dt))
+                    })
+                }
+                "MILLISECOND" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_ms(dt))
+                    })
+                }
+                "MICROSECOND" => {
+                    as_timestamp_tz_with_op::<&PrimitiveArray<T>, T, _>(iter, builder, tz, |dt| {
+                        as_micros_from_unix_epoch_utc(trunc_date_to_microsec(dt))
+                    })
+                }
+                _ => Err(ExpressionError::ArrowError(format!(
+                    "Unsupported format: {:?} for function 'timestamp_trunc'",
+                    format
+                ))),
+            }
+        }
+        dt => return_compute_error_with!(
+            "Unsupported input type '{:?}' for function 'timestamp_trunc'",
+            dt
+        ),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::execution::kernels::temporal::{date_trunc, timestamp_trunc};
+    use arrow_array::{Date32Array, TimestampMicrosecondArray};
+
+    #[test]
+    fn test_date_trunc() {
+        let size = 1000;
+        let mut vec: Vec<i32> = Vec::with_capacity(size);
+        for i in 0..size {
+            vec.push(i as i32);
+        }
+        let array = Date32Array::from(vec);
+        for fmt in [
+            "YEAR", "YYYY", "YY", "QUARTER", "MONTH", "MON", "MM", "WEEK",
+        ] {
+            match date_trunc(&array, fmt.to_string()) {
+                Ok(a) => {
+                    for i in 0..size {
+                        assert!(array.values().get(i) >= a.values().get(i))
+                    }
+                }
+                _ => assert!(false),
+            }
+        }
+    }
+
+    #[test]
+    fn test_timestamp_trunc() {
+        let size = 1000;
+        let mut vec: Vec<i64> = Vec::with_capacity(size);
+        for i in 0..size {
+            vec.push(i as i64);
+        }
+        let array = TimestampMicrosecondArray::from(vec).with_timezone_utc();
+        for fmt in [
+            "YEAR",
+            "YYYY",
+            "YY",
+            "QUARTER",
+            "MONTH",
+            "MON",
+            "MM",
+            "WEEK",
+            "DAY",
+            "DD",
+            "HOUR",
+            "MINUTE",
+            "SECOND",
+            "MILLISECOND",
+            "MICROSECOND",
+        ] {
+            match timestamp_trunc(&array, fmt.to_string()) {
+                Ok(a) => {
+                    for i in 0..size {
+                        assert!(array.values().get(i) >= a.values().get(i))
+                    }
+                }
+                _ => assert!(false),
+            }
+        }
+    }
+}
diff --git a/core/src/execution/metrics/mod.rs b/core/src/execution/metrics/mod.rs
new file mode 100644
index 000000000..a777b6863
--- /dev/null
+++ b/core/src/execution/metrics/mod.rs
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod utils;
diff --git a/core/src/execution/metrics/utils.rs b/core/src/execution/metrics/utils.rs
new file mode 100644
index 000000000..eb36a5562
--- /dev/null
+++ b/core/src/execution/metrics/utils.rs
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    errors::CometError,
+    jvm_bridge::{jni_call, jni_new_string},
+};
+use datafusion::physical_plan::ExecutionPlan;
+use jni::{objects::JObject, JNIEnv};
+use std::sync::Arc;
+
+/// Updates the metrics of a CometMetricNode. This function is called recursively to
+/// update the metrics of all the children nodes. The metrics are pulled from the
+/// DataFusion execution plan and pushed to the Java side through JNI.
+pub fn update_comet_metric(
+    env: &JNIEnv,
+    metric_node: JObject,
+    execution_plan: &Arc<dyn ExecutionPlan>,
+) -> Result<(), CometError> {
+    update_metrics(
+        env,
+        metric_node,
+        &execution_plan
+            .metrics()
+            .unwrap_or_default()
+            .iter()
+            .map(|m| m.value())
+            .map(|m| (m.name(), m.as_usize() as i64))
+            .collect::<Vec<_>>(),
+    )?;
+
+    for (i, child_plan) in execution_plan.children().iter().enumerate() {
+        let child_metric_node: JObject = jni_call!(env,
+            comet_metric_node(metric_node).get_child_node(i as i32) -> JObject
+        )?;
+        if child_metric_node.is_null() {
+            continue;
+        }
+        update_comet_metric(env, child_metric_node, child_plan)?;
+    }
+    Ok(())
+}
+
+#[inline]
+fn update_metrics(
+    env: &JNIEnv,
+    metric_node: JObject,
+    metric_values: &[(&str, i64)],
+) -> Result<(), CometError> {
+    for &(name, value) in metric_values {
+        let jname = jni_new_string!(env, &name)?;
+        jni_call!(env, comet_metric_node(metric_node).add(jname, value) -> ())?;
+    }
+    Ok(())
+}
diff --git a/core/src/execution/mod.rs b/core/src/execution/mod.rs
new file mode 100644
index 000000000..b0c60cc52
--- /dev/null
+++ b/core/src/execution/mod.rs
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! PoC of vectorization execution through JNI to Rust.
+pub mod datafusion;
+pub mod jni_api;
+
+pub mod kernels; // for benchmarking
+
+mod metrics;
+pub mod operators;
+pub mod serde;
+mod timezone;
+pub(crate) mod utils;
+
+// Include generated modules from .proto files.
+#[allow(missing_docs)]
+pub mod spark_expression {
+    include!(concat!("generated", "/spark.spark_expression.rs"));
+}
+
+// Include generated modules from .proto files.
+#[allow(missing_docs)]
+pub mod spark_partitioning {
+    include!(concat!("generated", "/spark.spark_partitioning.rs"));
+}
+
+// Include generated modules from .proto files.
+#[allow(missing_docs)]
+pub mod spark_operator {
+    include!(concat!("generated", "/spark.spark_operator.rs"));
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/core/src/execution/operators/copy.rs b/core/src/execution/operators/copy.rs
new file mode 100644
index 000000000..394c1ca80
--- /dev/null
+++ b/core/src/execution/operators/copy.rs
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    pin::Pin,
+    sync::Arc,
+    task::{Context, Poll},
+};
+
+use futures::{Stream, StreamExt};
+
+use arrow_array::{ArrayRef, RecordBatch};
+use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
+
+use datafusion::{execution::TaskContext, physical_expr::*, physical_plan::*};
+use datafusion_common::{DataFusionError, Result as DataFusionResult};
+
+use super::copy_or_cast_array;
+
+/// An utility execution node which makes deep copies of input batches.
+///
+/// In certain scenarios like sort, DF execution nodes only make shallow copy of input batches.
+/// This could cause issues for Comet, since we re-use column vectors across different batches.
+/// For those scenarios, this can be used as an adapter node.
+#[derive(Debug)]
+pub struct CopyExec {
+    input: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+}
+
+impl CopyExec {
+    pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
+        let fields: Vec<Field> = input
+            .schema()
+            .fields
+            .iter()
+            .map(|f: &FieldRef| match f.data_type() {
+                DataType::Dictionary(_, value_type) => {
+                    Field::new(f.name(), value_type.as_ref().clone(), f.is_nullable())
+                }
+                _ => f.as_ref().clone(),
+            })
+            .collect();
+
+        let schema = Arc::new(Schema::new(fields));
+
+        Self { input, schema }
+    }
+}
+
+impl DisplayAs for CopyExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "CopyExec")
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for CopyExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn output_partitioning(&self) -> Partitioning {
+        self.input.output_partitioning()
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.input.output_ordering()
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        self.input.children()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        let input = self.input.clone();
+        let new_input = input.with_new_children(children)?;
+        Ok(Arc::new(CopyExec {
+            input: new_input,
+            schema: self.schema.clone(),
+        }))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DataFusionResult<SendableRecordBatchStream> {
+        let child_stream = self.input.execute(partition, context)?;
+        Ok(Box::pin(CopyStream::new(self.schema(), child_stream)))
+    }
+
+    fn statistics(&self) -> DataFusionResult<Statistics> {
+        self.input.statistics()
+    }
+}
+
+struct CopyStream {
+    schema: SchemaRef,
+    child_stream: SendableRecordBatchStream,
+}
+
+impl CopyStream {
+    fn new(schema: SchemaRef, child_stream: SendableRecordBatchStream) -> Self {
+        Self {
+            schema,
+            child_stream,
+        }
+    }
+
+    // TODO: replace copy_or_cast_array with copy_array if upstream sort kernel fixes
+    // dictionary array sorting issue.
+    fn copy(&self, batch: RecordBatch) -> DataFusionResult<RecordBatch> {
+        let vectors = batch
+            .columns()
+            .iter()
+            .map(|v| copy_or_cast_array(v))
+            .collect::<Result<Vec<ArrayRef>, _>>()?;
+        RecordBatch::try_new(self.schema.clone(), vectors).map_err(DataFusionError::ArrowError)
+    }
+}
+
+impl Stream for CopyStream {
+    type Item = DataFusionResult<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        self.child_stream.poll_next_unpin(cx).map(|x| match x {
+            Some(Ok(batch)) => Some(self.copy(batch)),
+            other => other,
+        })
+    }
+}
+
+impl RecordBatchStream for CopyStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/core/src/execution/operators/mod.rs b/core/src/execution/operators/mod.rs
new file mode 100644
index 000000000..5d05fdb8d
--- /dev/null
+++ b/core/src/execution/operators/mod.rs
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Operators
+
+use arrow::{
+    array::{make_array, Array, ArrayRef, MutableArrayData},
+    datatypes::DataType,
+    downcast_dictionary_array,
+};
+
+use arrow::compute::{cast_with_options, CastOptions};
+use arrow_schema::ArrowError;
+use std::{fmt::Debug, sync::Arc};
+
+mod scan;
+pub use scan::*;
+
+mod copy;
+pub use copy::*;
+
+/// Error returned during executing operators.
+#[derive(thiserror::Error, Debug)]
+pub enum ExecutionError {
+    /// Simple error
+    #[allow(dead_code)]
+    #[error("General execution error with reason {0}.")]
+    GeneralError(String),
+
+    /// Error when deserializing an operator.
+    #[error("Fail to deserialize to native operator with reason {0}.")]
+    DeserializeError(String),
+
+    /// Error when processing Arrow array.
+    #[error("Fail to process Arrow array with reason {0}.")]
+    ArrowError(String),
+
+    /// DataFusion error
+    #[error("Error from DataFusion {0}.")]
+    DataFusionError(String),
+}
+
+/// Copy an Arrow Array
+pub fn copy_array(array: &dyn Array) -> ArrayRef {
+    let capacity = array.len();
+    let data = array.to_data();
+
+    let mut mutable = MutableArrayData::new(vec![&data], false, capacity);
+
+    mutable.extend(0, 0, capacity);
+
+    if matches!(array.data_type(), DataType::Dictionary(_, _)) {
+        let copied_dict = make_array(mutable.freeze());
+        let ref_copied_dict = &copied_dict;
+
+        downcast_dictionary_array!(
+            ref_copied_dict => {
+                // Copying dictionary value array
+                let values = ref_copied_dict.values();
+                let data = values.to_data();
+
+                let mut mutable = MutableArrayData::new(vec![&data], false, values.len());
+                mutable.extend(0, 0, values.len());
+
+                let copied_dict = ref_copied_dict.with_values(make_array(mutable.freeze()));
+                Arc::new(copied_dict)
+            }
+            t => unreachable!("Should not reach here: {}", t)
+        )
+    } else {
+        make_array(mutable.freeze())
+    }
+}
+
+/// Copy an Arrow Array or cast to primitive type if it is a dictionary array.
+/// This is used for `CopyExec` to copy/cast the input array. If the input array
+/// is a dictionary array, we will cast the dictionary array to primitive type
+/// (i.e., unpack the dictionary array) and copy the primitive array. If the input
+/// array is a primitive array, we simply copy the array.
+pub fn copy_or_cast_array(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
+    match array.data_type() {
+        DataType::Dictionary(_, value_type) => {
+            let options = CastOptions::default();
+            let casted = cast_with_options(array, value_type.as_ref(), &options);
+
+            casted.and_then(|a| copy_or_cast_array(a.as_ref()))
+        }
+        _ => Ok(copy_array(array)),
+    }
+}
diff --git a/core/src/execution/operators/scan.rs b/core/src/execution/operators/scan.rs
new file mode 100644
index 000000000..09afc5f03
--- /dev/null
+++ b/core/src/execution/operators/scan.rs
@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    pin::Pin,
+    sync::{Arc, Mutex},
+    task::{Context, Poll},
+};
+
+use futures::Stream;
+use itertools::Itertools;
+
+use arrow::compute::{cast_with_options, CastOptions};
+use arrow_array::{ArrayRef, RecordBatch, RecordBatchOptions};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+
+use datafusion::{
+    execution::TaskContext,
+    physical_expr::*,
+    physical_plan::{ExecutionPlan, *},
+};
+use datafusion_common::{DataFusionError, Result as DataFusionResult};
+
+#[derive(Debug, Clone)]
+pub struct ScanExec {
+    pub batch: Arc<Mutex<Option<InputBatch>>>,
+    pub data_types: Vec<DataType>,
+}
+
+impl ScanExec {
+    pub fn new(batch: InputBatch, data_types: Vec<DataType>) -> Self {
+        Self {
+            batch: Arc::new(Mutex::new(Some(batch))),
+            data_types,
+        }
+    }
+
+    /// Feeds input batch into this `Scan`.
+    pub fn set_input_batch(&mut self, input: InputBatch) {
+        *self.batch.try_lock().unwrap() = Some(input);
+    }
+
+    /// Checks if the input data type `dt` is a dictionary type with primitive value type.
+    /// If so, unpacks it and returns the primitive value type.
+    ///
+    /// Otherwise, this returns the original data type.
+    ///
+    /// This is necessary since DataFusion doesn't handle dictionary array with values
+    /// being primitive type.
+    ///
+    /// TODO: revisit this once DF has imprved its dictionary type support. Ideally we shouldn't
+    ///   do this in Comet but rather let DF to handle it for us.
+    fn unpack_dictionary_type(dt: &DataType) -> DataType {
+        if let DataType::Dictionary(_, vt) = dt {
+            if !matches!(vt.as_ref(), DataType::Utf8 | DataType::Binary) {
+                return vt.as_ref().clone();
+            }
+        }
+
+        dt.clone()
+    }
+}
+
+impl ExecutionPlan for ScanExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        // `unwrap` is safe because `schema` is only called during converting
+        // Spark plan to DataFusion plan. At the moment, `batch` is not EOF.
+        let binding = self.batch.try_lock().unwrap();
+        let input_batch = binding.as_ref().unwrap();
+
+        let fields = match input_batch {
+            // Note that if `columns` is empty, we'll get an empty schema
+            InputBatch::Batch(columns, _) => {
+                columns
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, c)| {
+                        let datatype = Self::unpack_dictionary_type(c.data_type());
+                        // We don't use the field name. Put a placeholder.
+                        if matches!(datatype, DataType::Dictionary(_, _)) {
+                            Field::new_dict(
+                                format!("col_{}", idx),
+                                datatype,
+                                true,
+                                idx as i64,
+                                false,
+                            )
+                        } else {
+                            Field::new(format!("col_{}", idx), datatype, true)
+                        }
+                    })
+                    .collect::<Vec<Field>>()
+            }
+            _ => self
+                .data_types
+                .iter()
+                .enumerate()
+                .map(|(idx, dt)| Field::new(format!("col_{}", idx), dt.clone(), true))
+                .collect(),
+        };
+
+        Arc::new(Schema::new(fields))
+    }
+
+    fn output_partitioning(&self) -> Partitioning {
+        Partitioning::UnknownPartitioning(1)
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion::common::Result<Arc<dyn ExecutionPlan>> {
+        Ok(self)
+    }
+
+    fn execute(
+        &self,
+        _: usize,
+        _: Arc<TaskContext>,
+    ) -> datafusion::common::Result<SendableRecordBatchStream> {
+        Ok(Box::pin(ScanStream::new(self.clone(), self.schema())))
+    }
+}
+
+impl DisplayAs for ScanExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "ScanExec")?;
+                let fields: Vec<String> = self
+                    .data_types
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, dt)| format!("col_{idx:}: {dt:}"))
+                    .collect();
+                write!(f, ": schema=[{}]", fields.join(", "))?;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// A async-stream feeds input batch from `Scan` into DataFusion physical plan.
+struct ScanStream {
+    /// The `Scan` node producing input batches
+    scan: ScanExec,
+    /// Schema representing the data
+    schema: SchemaRef,
+}
+
+impl ScanStream {
+    pub fn new(scan: ScanExec, schema: SchemaRef) -> Self {
+        Self { scan, schema }
+    }
+
+    fn build_record_batch(
+        &self,
+        columns: &[ArrayRef],
+        num_rows: usize,
+    ) -> DataFusionResult<RecordBatch, DataFusionError> {
+        let schema_fields = self.schema.fields();
+        assert_eq!(columns.len(), schema_fields.len());
+
+        // Cast if necessary
+        let cast_options = CastOptions::default();
+        let new_columns: Vec<ArrayRef> = columns
+            .iter()
+            .zip(schema_fields.iter())
+            .map(|(column, f)| {
+                if column.data_type() != f.data_type() {
+                    cast_with_options(column, f.data_type(), &cast_options).unwrap()
+                } else {
+                    column.clone()
+                }
+            })
+            .collect();
+
+        let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
+        RecordBatch::try_new_with_options(self.schema.clone(), new_columns, &options)
+            .map_err(DataFusionError::ArrowError)
+    }
+}
+
+impl Stream for ScanStream {
+    type Item = DataFusionResult<RecordBatch>;
+
+    fn poll_next(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let mut scan_batch = self.scan.batch.try_lock().unwrap();
+        let input_batch = &*scan_batch;
+        let result = match input_batch {
+            // Input batch is not ready.
+            None => Poll::Pending,
+            Some(batch) => match batch {
+                InputBatch::EOF => Poll::Ready(None),
+                InputBatch::Batch(columns, num_rows) => {
+                    Poll::Ready(Some(self.build_record_batch(columns, *num_rows)))
+                }
+            },
+        };
+
+        // Reset the current input batch so it won't be processed again
+        *scan_batch = None;
+        result
+    }
+}
+
+impl RecordBatchStream for ScanStream {
+    /// Get the schema
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum InputBatch {
+    /// The end of input batches.
+    EOF,
+
+    /// A normal batch with columns and number of rows.
+    /// It is possible to have zero-column batch with non-zero number of rows,
+    /// i.e. reading empty schema from scan.
+    Batch(Vec<ArrayRef>, usize),
+}
+
+impl InputBatch {
+    /// Constructs a `InputBatch` from columns and optional number of rows.
+    /// If `num_rows` is none, this function will calculate it from given
+    /// columns.
+    pub fn new(columns: Vec<ArrayRef>, num_rows: Option<usize>) -> Self {
+        let num_rows = num_rows.unwrap_or_else(|| {
+            let lengths = columns.iter().map(|a| a.len()).unique().collect::<Vec<_>>();
+            assert!(lengths.len() <= 1, "Columns have different lengths.");
+
+            if lengths.is_empty() {
+                // All are scalar values
+                1
+            } else {
+                lengths[0]
+            }
+        });
+
+        InputBatch::Batch(columns, num_rows)
+    }
+}
diff --git a/core/src/execution/proto/expr.proto b/core/src/execution/proto/expr.proto
new file mode 100644
index 000000000..7311c1245
--- /dev/null
+++ b/core/src/execution/proto/expr.proto
@@ -0,0 +1,454 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+
+syntax = "proto3";
+
+package spark.spark_expression;
+
+option java_package = "org.apache.comet.serde";
+
+// The basic message representing a Spark expression.
+message Expr {
+  oneof expr_struct {
+    Literal literal = 2;
+    BoundReference bound = 3;
+    Add add = 4;
+    Subtract subtract = 5;
+    Multiply multiply = 6;
+    Divide divide = 7;
+    Cast cast = 8;
+    Equal eq = 9;
+    NotEqual neq = 10;
+    GreaterThan gt = 11;
+    GreaterThanEqual gt_eq = 12;
+    LessThan lt = 13;
+    LessThanEqual lt_eq = 14;
+    IsNull is_null = 15;
+    IsNotNull is_not_null = 16;
+    And and = 17;
+    Or or = 18;
+    SortOrder sort_order = 19;
+    Substring substring = 20;
+    StringSpace string_space = 21;
+    Hour hour = 22;
+    Minute minute = 23;
+    Second second = 24;
+    CheckOverflow check_overflow = 25;
+    Like like = 26;
+    StartsWith startsWith = 27;
+    EndsWith endsWith = 28;
+    Contains contains = 29;
+    // RLike rlike = 30;
+    ScalarFunc scalarFunc = 31;
+    EqualNullSafe eqNullSafe = 32;
+    NotEqualNullSafe neqNullSafe = 33;
+    BitwiseAnd bitwiseAnd = 34;
+    BitwiseOr bitwiseOr = 35;
+    BitwiseXor bitwiseXor = 36;
+    Remainder remainder = 37;
+    CaseWhen caseWhen = 38;
+    In in = 39;
+    Not not = 40;
+    Negative negative = 41;
+    BitwiseShiftRight bitwiseShiftRight = 42;
+    BitwiseShiftLeft bitwiseShiftLeft = 43;
+    IfExpr if = 44;
+    NormalizeNaNAndZero normalize_nan_and_zero = 45;
+    TruncDate truncDate = 46;
+    TruncTimestamp truncTimestamp = 47;
+    BitwiseNot bitwiseNot = 48;
+    Abs abs = 49;
+    Subquery subquery = 50;
+  }
+}
+
+message AggExpr {
+  oneof expr_struct {
+    Count count = 2;
+    Sum sum = 3;
+    Min min = 4;
+    Max max = 5;
+    Avg avg = 6;
+  }
+}
+
+message Count {
+   repeated Expr children = 1;
+}
+
+message Sum {
+   Expr child = 1;
+   DataType datatype = 2;
+   bool fail_on_error = 3;
+}
+
+message Min {
+  Expr child = 1;
+  DataType datatype = 2;
+}
+
+message Max {
+  Expr child = 1;
+  DataType datatype = 2;
+}
+
+message Avg {
+  Expr child = 1;
+  DataType datatype = 2;
+  DataType sum_datatype = 3;
+  bool fail_on_error = 4; // currently unused (useful for deciding Ansi vs Legacy mode)
+}
+
+message Literal {
+  oneof value {
+    bool bool_val = 1;
+    // Protobuf doesn't provide int8 and int16, we put them into int32 and convert
+    // to int8 and int16 when deserializing.
+    int32 byte_val = 2;
+    int32 short_val = 3;
+    int32 int_val = 4;
+    int64 long_val = 5;
+    float float_val = 6;
+    double double_val = 7;
+    string string_val = 8;
+    bytes bytes_val = 9;
+    bytes decimal_val = 10;
+   }
+
+   DataType datatype = 11;
+   bool is_null = 12;
+}
+
+message Add {
+  Expr left = 1;
+  Expr right = 2;
+  bool fail_on_error = 3;
+  DataType return_type = 4;
+}
+
+message Subtract {
+  Expr left = 1;
+  Expr right = 2;
+  bool fail_on_error = 3;
+  DataType return_type = 4;
+}
+
+message Multiply {
+  Expr left = 1;
+  Expr right = 2;
+  bool fail_on_error = 3;
+  DataType return_type = 4;
+}
+
+message Divide {
+  Expr left = 1;
+  Expr right = 2;
+  bool fail_on_error = 3;
+  DataType return_type = 4;
+}
+
+message Remainder {
+  Expr left = 1;
+  Expr right = 2;
+  bool fail_on_error = 3;
+  DataType return_type = 4;
+}
+
+message Cast {
+  Expr child = 1;
+  DataType datatype = 2;
+  string timezone = 3;
+}
+
+message Equal {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message NotEqual {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message EqualNullSafe {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message NotEqualNullSafe {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message GreaterThan {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message GreaterThanEqual {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message LessThan {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message LessThanEqual {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message And {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message Or {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message IsNull {
+  Expr child = 1;
+}
+
+message IsNotNull {
+  Expr child = 1;
+}
+
+// Bound to a particular vector array in input batch.
+message BoundReference {
+  int32 index = 1;
+  DataType datatype = 2;
+}
+
+message SortOrder {
+  Expr child = 1;
+  SortDirection direction = 2;
+  NullOrdering null_ordering = 3;
+}
+
+message Substring {
+  Expr child = 1;
+  int32 start = 2;
+  int32 len = 3;
+}
+
+message StringSpace {
+  Expr child = 1;
+}
+
+message Hour {
+  Expr child = 1;
+  string timezone = 2;
+}
+
+message Minute {
+  Expr child = 1;
+  string timezone = 2;
+}
+
+message Second {
+  Expr child = 1;
+  string timezone = 2;
+}
+
+message CheckOverflow {
+  Expr child = 1;
+  DataType datatype = 2;
+  bool fail_on_error = 3;
+}
+
+message Like {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+// message RLike {
+//   Expr left = 1;
+//   Expr right = 2;
+// }
+
+message StartsWith {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message EndsWith {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message Contains {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message ScalarFunc {
+  string func = 1;
+  repeated Expr args = 2;
+  DataType return_type = 3;
+}
+
+message BitwiseAnd {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message BitwiseOr {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message BitwiseXor {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message BitwiseShiftRight {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message BitwiseShiftLeft {
+  Expr left = 1;
+  Expr right = 2;
+}
+
+message CaseWhen {
+  // The expr field is added to be consistent with CaseExpr definition in DataFusion.
+  // This field is not really used. When constructing a CaseExpr, this expr field
+  // is always set to None. The reason that we always set this expr field to None
+  // is because Spark parser converts the expr to a EqualTo conditions. After the
+  // conversion, we don't see this expr any more so it's always None.
+  Expr expr = 1;
+  repeated Expr when = 2;
+  repeated Expr then = 3;
+  Expr else_expr = 4;
+}
+
+message In {
+  Expr in_value = 1;
+  repeated Expr lists = 2;
+  bool negated = 3;
+}
+
+message NormalizeNaNAndZero {
+  Expr child = 1;
+  DataType datatype = 2;
+}
+
+message Not {
+  Expr child = 1;
+}
+
+message Negative {
+  Expr child = 1;
+}
+
+message IfExpr {
+  Expr if_expr = 1;
+  Expr true_expr = 2;
+  Expr false_expr = 3;
+}
+
+message TruncDate {
+  Expr child = 1;
+  Expr format = 2;
+}
+
+message TruncTimestamp {
+  Expr format = 1;
+  Expr child = 2;
+  string timezone = 3;
+}
+
+message BitwiseNot {
+  Expr child = 1;
+}
+
+message Abs {
+  Expr child = 1;
+}
+
+message Subquery {
+  int64 id = 1;
+  DataType datatype = 2;
+}
+
+enum SortDirection {
+  Ascending = 0;
+  Descending = 1;
+}
+
+enum NullOrdering {
+  NullsFirst = 0;
+  NullsLast = 1;
+}
+
+message DataType {
+  enum DataTypeId {
+    BOOL = 0;
+    INT8 = 1;
+    INT16 = 2;
+    INT32 = 3;
+    INT64 = 4;
+    FLOAT = 5;
+    DOUBLE = 6;
+    STRING = 7;
+    BYTES = 8;
+    TIMESTAMP = 9;
+    DECIMAL = 10;
+    TIMESTAMP_NTZ = 11;
+    DATE = 12;
+    NULL = 13;
+    LIST = 14;
+    STRUCT = 15;
+  }
+  DataTypeId type_id = 1;
+
+  message DataTypeInfo {
+    oneof datatype_struct {
+      DecimalInfo decimal = 2;
+      ListInfo list = 3;
+      StructInfo struct = 4;
+    }
+  }
+
+  message DecimalInfo {
+    int32 precision = 1;
+    int32 scale = 2;
+  }
+
+  message ListInfo {
+    DataType element_type = 1;
+    bool contains_null = 2;
+  }
+
+  message StructInfo {
+    repeated string field_names = 1;
+    repeated DataType field_datatypes = 2;
+    repeated bool field_nullable = 3;
+  }
+
+  DataTypeInfo type_info = 2;
+}
+
diff --git a/core/src/execution/proto/operator.proto b/core/src/execution/proto/operator.proto
new file mode 100644
index 000000000..f4f785396
--- /dev/null
+++ b/core/src/execution/proto/operator.proto
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+
+syntax = "proto3";
+
+package spark.spark_operator;
+
+import "expr.proto";
+import "partitioning.proto";
+
+option java_package = "org.apache.comet.serde";
+
+// The basic message representing a Spark operator.
+message Operator {
+  // The child operators of this
+  repeated Operator children = 1;
+
+  oneof op_struct {
+    Scan scan = 100;
+    Projection projection = 101;
+    Filter filter = 102;
+    Sort sort = 103;
+    HashAggregate hash_agg = 104;
+    Limit limit = 105;
+    Expand expand = 106;
+  }
+}
+
+message Scan {
+  repeated spark.spark_expression.DataType fields = 1;
+}
+
+message Projection {
+  repeated spark.spark_expression.Expr project_list = 1;
+}
+
+message Filter {
+  spark.spark_expression.Expr predicate = 1;
+}
+
+message Sort {
+  repeated spark.spark_expression.Expr sort_orders = 1;
+  optional int32 fetch = 3;
+}
+
+message HashAggregate {
+  repeated spark.spark_expression.Expr grouping_exprs = 1;
+  repeated spark.spark_expression.AggExpr agg_exprs = 2;
+  repeated spark.spark_expression.Expr result_exprs = 3;
+  AggregateMode mode = 5;
+}
+
+message Limit {
+  int32 limit = 1;
+  int32 offset = 2;
+}
+
+enum AggregateMode {
+  Partial = 0;
+  Final = 1;
+}
+
+message Expand {
+  repeated spark.spark_expression.Expr project_list = 1;
+  int32 num_expr_per_project = 3;
+}
diff --git a/core/src/execution/proto/partitioning.proto b/core/src/execution/proto/partitioning.proto
new file mode 100644
index 000000000..21bd50560
--- /dev/null
+++ b/core/src/execution/proto/partitioning.proto
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+
+syntax = "proto3";
+
+package spark.spark_partitioning;
+
+import "expr.proto";
+
+option java_package = "org.apache.comet.serde";
+
+// The basic message representing a Spark partitioning.
+message Partitioning {
+  oneof partitioning_struct {
+    HashRepartition hash_partition = 2;
+    SinglePartition single_partition = 3;
+  }
+}
+
+message HashRepartition {
+  repeated spark.spark_expression.Expr hash_expression = 1;
+  int32 num_partitions = 2;
+}
+
+message SinglePartition {
+}
diff --git a/core/src/execution/serde.rs b/core/src/execution/serde.rs
new file mode 100644
index 000000000..2f38b4527
--- /dev/null
+++ b/core/src/execution/serde.rs
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Ser/De for expression/operators.
+
+use super::{
+    operators::ExecutionError, spark_expression, spark_expression::DataType, spark_operator,
+};
+use crate::{
+    errors::ExpressionError,
+    execution::spark_expression::data_type::{
+        data_type_info::DatatypeStruct,
+        DataTypeId,
+        DataTypeId::{Bool, Bytes, Decimal, Double, Float, Int16, Int32, Int64, Int8, String},
+    },
+};
+use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
+use arrow_schema::Field;
+use prost::Message;
+use std::{io::Cursor, sync::Arc};
+
+impl From<prost::DecodeError> for ExpressionError {
+    fn from(error: prost::DecodeError) -> ExpressionError {
+        ExpressionError::Deserialize(error.to_string())
+    }
+}
+
+impl From<prost::DecodeError> for ExecutionError {
+    fn from(error: prost::DecodeError) -> ExecutionError {
+        ExecutionError::DeserializeError(error.to_string())
+    }
+}
+
+/// Deserialize bytes to protobuf type of expression
+pub fn deserialize_expr(buf: &[u8]) -> Result<spark_expression::Expr, ExpressionError> {
+    match spark_expression::Expr::decode(&mut Cursor::new(buf)) {
+        Ok(e) => Ok(e),
+        Err(err) => Err(ExpressionError::from(err)),
+    }
+}
+
+/// Deserialize bytes to protobuf type of operator
+pub fn deserialize_op(buf: &[u8]) -> Result<spark_operator::Operator, ExecutionError> {
+    match spark_operator::Operator::decode(&mut Cursor::new(buf)) {
+        Ok(e) => Ok(e),
+        Err(err) => Err(ExecutionError::from(err)),
+    }
+}
+
+/// Deserialize bytes to protobuf type of data type
+pub fn deserialize_data_type(buf: &[u8]) -> Result<spark_expression::DataType, ExecutionError> {
+    match spark_expression::DataType::decode(&mut Cursor::new(buf)) {
+        Ok(e) => Ok(e),
+        Err(err) => Err(ExecutionError::from(err)),
+    }
+}
+
+/// Converts Protobuf data type to Arrow data type.
+pub fn to_arrow_datatype(dt_value: &DataType) -> ArrowDataType {
+    match DataTypeId::try_from(dt_value.type_id).unwrap() {
+        Bool => ArrowDataType::Boolean,
+        Int8 => ArrowDataType::Int8,
+        Int16 => ArrowDataType::Int16,
+        Int32 => ArrowDataType::Int32,
+        Int64 => ArrowDataType::Int64,
+        Float => ArrowDataType::Float32,
+        Double => ArrowDataType::Float64,
+        String => ArrowDataType::Utf8,
+        Bytes => ArrowDataType::Binary,
+        Decimal => match dt_value
+            .type_info
+            .as_ref()
+            .unwrap()
+            .datatype_struct
+            .as_ref()
+            .unwrap()
+        {
+            DatatypeStruct::Decimal(info) => {
+                ArrowDataType::Decimal128(info.precision as u8, info.scale as i8)
+            }
+            _ => unreachable!(),
+        },
+        DataTypeId::Timestamp => {
+            ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into()))
+        }
+        DataTypeId::TimestampNtz => ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
+        DataTypeId::Date => ArrowDataType::Date32,
+        DataTypeId::Null => ArrowDataType::Null,
+        DataTypeId::List => match dt_value
+            .type_info
+            .as_ref()
+            .unwrap()
+            .datatype_struct
+            .as_ref()
+            .unwrap()
+        {
+            DatatypeStruct::List(info) => {
+                let field = Field::new(
+                    "item",
+                    to_arrow_datatype(info.element_type.as_ref().unwrap()),
+                    info.contains_null,
+                );
+                ArrowDataType::List(Arc::new(field))
+            }
+            _ => unreachable!(),
+        },
+        DataTypeId::Struct => match dt_value
+            .type_info
+            .as_ref()
+            .unwrap()
+            .datatype_struct
+            .as_ref()
+            .unwrap()
+        {
+            DatatypeStruct::Struct(info) => {
+                let fields = info
+                    .field_names
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, name)| {
+                        Field::new(
+                            name,
+                            to_arrow_datatype(&info.field_datatypes[idx]),
+                            info.field_nullable[idx],
+                        )
+                    })
+                    .collect();
+                ArrowDataType::Struct(fields)
+            }
+            _ => unreachable!(),
+        },
+    }
+}
diff --git a/core/src/execution/timezone.rs b/core/src/execution/timezone.rs
new file mode 100644
index 000000000..7aad386aa
--- /dev/null
+++ b/core/src/execution/timezone.rs
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Utils for timezone. This is basically from arrow-array::timezone (private).
+use arrow_schema::ArrowError;
+use chrono::{
+    format::{parse, Parsed, StrftimeItems},
+    offset::TimeZone,
+    FixedOffset, LocalResult, NaiveDate, NaiveDateTime, Offset,
+};
+use std::str::FromStr;
+
+/// Parses a fixed offset of the form "+09:00"
+fn parse_fixed_offset(tz: &str) -> Result<FixedOffset, ArrowError> {
+    let mut parsed = Parsed::new();
+
+    if let Ok(fixed_offset) =
+        parse(&mut parsed, tz, StrftimeItems::new("%:z")).and_then(|_| parsed.to_fixed_offset())
+    {
+        return Ok(fixed_offset);
+    }
+
+    if let Ok(fixed_offset) =
+        parse(&mut parsed, tz, StrftimeItems::new("%#z")).and_then(|_| parsed.to_fixed_offset())
+    {
+        return Ok(fixed_offset);
+    }
+
+    Err(ArrowError::ParseError(format!(
+        "Invalid timezone \"{}\": Expected format [+-]XX:XX, [+-]XX, or [+-]XXXX",
+        tz
+    )))
+}
+
+/// An [`Offset`] for [`Tz`]
+#[derive(Debug, Copy, Clone)]
+pub struct TzOffset {
+    tz: Tz,
+    offset: FixedOffset,
+}
+
+impl std::fmt::Display for TzOffset {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.offset.fmt(f)
+    }
+}
+
+impl Offset for TzOffset {
+    fn fix(&self) -> FixedOffset {
+        self.offset
+    }
+}
+
+/// An Arrow [`TimeZone`]
+#[derive(Debug, Copy, Clone)]
+pub struct Tz(TzInner);
+
+#[derive(Debug, Copy, Clone)]
+enum TzInner {
+    Timezone(chrono_tz::Tz),
+    Offset(FixedOffset),
+}
+
+impl FromStr for Tz {
+    type Err = ArrowError;
+
+    fn from_str(tz: &str) -> Result<Self, Self::Err> {
+        if tz.starts_with('+') || tz.starts_with('-') {
+            Ok(Self(TzInner::Offset(parse_fixed_offset(tz)?)))
+        } else {
+            Ok(Self(TzInner::Timezone(tz.parse().map_err(|e| {
+                ArrowError::ParseError(format!("Invalid timezone \"{}\": {}", tz, e))
+            })?)))
+        }
+    }
+}
+
+macro_rules! tz {
+    ($s:ident, $tz:ident, $b:block) => {
+        match $s.0 {
+            TzInner::Timezone($tz) => $b,
+            TzInner::Offset($tz) => $b,
+        }
+    };
+}
+
+impl TimeZone for Tz {
+    type Offset = TzOffset;
+
+    fn from_offset(offset: &Self::Offset) -> Self {
+        offset.tz
+    }
+
+    fn offset_from_local_date(&self, local: &NaiveDate) -> LocalResult<Self::Offset> {
+        tz!(self, tz, {
+            tz.offset_from_local_date(local).map(|x| TzOffset {
+                tz: *self,
+                offset: x.fix(),
+            })
+        })
+    }
+
+    fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
+        tz!(self, tz, {
+            tz.offset_from_local_datetime(local).map(|x| TzOffset {
+                tz: *self,
+                offset: x.fix(),
+            })
+        })
+    }
+
+    fn offset_from_utc_date(&self, utc: &NaiveDate) -> Self::Offset {
+        tz!(self, tz, {
+            TzOffset {
+                tz: *self,
+                offset: tz.offset_from_utc_date(utc).fix(),
+            }
+        })
+    }
+
+    fn offset_from_utc_datetime(&self, utc: &NaiveDateTime) -> Self::Offset {
+        tz!(self, tz, {
+            TzOffset {
+                tz: *self,
+                offset: tz.offset_from_utc_datetime(utc).fix(),
+            }
+        })
+    }
+}
diff --git a/core/src/execution/utils.rs b/core/src/execution/utils.rs
new file mode 100644
index 000000000..cb21391a2
--- /dev/null
+++ b/core/src/execution/utils.rs
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::{
+    array::ArrayData,
+    error::ArrowError,
+    ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
+};
+
+/// Utils for array vector, etc.
+use crate::errors::ExpressionError;
+use crate::execution::operators::ExecutionError;
+
+impl From<ArrowError> for ExecutionError {
+    fn from(error: ArrowError) -> ExecutionError {
+        ExecutionError::ArrowError(error.to_string())
+    }
+}
+
+impl From<ArrowError> for ExpressionError {
+    fn from(error: ArrowError) -> ExpressionError {
+        ExpressionError::ArrowError(error.to_string())
+    }
+}
+
+impl From<ExpressionError> for ArrowError {
+    fn from(error: ExpressionError) -> ArrowError {
+        ArrowError::ComputeError(error.to_string())
+    }
+}
+
+pub trait SparkArrowConvert {
+    /// Build Arrow Arrays from C data interface passed from Spark.
+    /// It accepts a tuple (ArrowArray address, ArrowSchema address).
+    fn from_spark(addresses: (i64, i64)) -> Result<Self, ExecutionError>
+    where
+        Self: Sized;
+
+    /// Convert Arrow Arrays to C data interface.
+    /// It returns a tuple (ArrowArray address, ArrowSchema address).
+    fn to_spark(&self) -> Result<(i64, i64), ExecutionError>;
+}
+
+impl SparkArrowConvert for ArrayData {
+    fn from_spark(addresses: (i64, i64)) -> Result<Self, ExecutionError> {
+        let (array_ptr, schema_ptr) = addresses;
+
+        let array_ptr = array_ptr as *mut FFI_ArrowArray;
+        let schema_ptr = schema_ptr as *mut FFI_ArrowSchema;
+
+        if array_ptr.is_null() || schema_ptr.is_null() {
+            return Err(ExecutionError::ArrowError(
+                "At least one of passed pointers is null".to_string(),
+            ));
+        };
+
+        // `ArrowArray` will convert raw pointers back to `Arc`. No worries
+        // about memory leak.
+        let mut ffi_array = unsafe {
+            let array_data = std::ptr::replace(array_ptr, FFI_ArrowArray::empty());
+            let schema_data = std::ptr::replace(schema_ptr, FFI_ArrowSchema::empty());
+
+            from_ffi(array_data, &schema_data)?
+        };
+
+        // Align imported buffers from Java.
+        ffi_array.align_buffers();
+
+        Ok(ffi_array)
+    }
+
+    /// Converts this ArrowData to pointers of Arrow C data interface.
+    /// Returned pointers are Arc-ed and should be freed manually.
+    #[allow(clippy::arc_with_non_send_sync)]
+    fn to_spark(&self) -> Result<(i64, i64), ExecutionError> {
+        let arrow_array = Arc::new(FFI_ArrowArray::new(self));
+        let arrow_schema = Arc::new(FFI_ArrowSchema::try_from(self.data_type())?);
+
+        let (array, schema) = (Arc::into_raw(arrow_array), Arc::into_raw(arrow_schema));
+
+        Ok((array as i64, schema as i64))
+    }
+}
+
+/// Converts a slice of bytes to i128. The bytes are serialized in big-endian order by
+/// `BigInteger.toByteArray()` in Java.
+pub fn bytes_to_i128(slice: &[u8]) -> i128 {
+    let mut bytes = [0; 16];
+    let mut i = 0;
+    while i != 16 && i != slice.len() {
+        bytes[i] = slice[slice.len() - 1 - i];
+        i += 1;
+    }
+
+    // if the decimal is negative, we need to flip all the bits
+    if (slice[0] as i8) < 0 {
+        while i < 16 {
+            bytes[i] = !bytes[i];
+            i += 1;
+        }
+    }
+
+    i128::from_le_bytes(bytes)
+}
diff --git a/core/src/jvm_bridge/comet_exec.rs b/core/src/jvm_bridge/comet_exec.rs
new file mode 100644
index 000000000..e28fc080f
--- /dev/null
+++ b/core/src/jvm_bridge/comet_exec.rs
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use jni::{
+    errors::Result as JniResult,
+    objects::{JClass, JStaticMethodID},
+    signature::{JavaType, Primitive},
+    JNIEnv,
+};
+
+use super::get_global_jclass;
+
+/// A struct that holds all the JNI methods and fields for JVM CometExec object.
+pub struct CometExec<'a> {
+    pub class: JClass<'a>,
+    pub method_get_bool: JStaticMethodID<'a>,
+    pub method_get_bool_ret: JavaType,
+    pub method_get_byte: JStaticMethodID<'a>,
+    pub method_get_byte_ret: JavaType,
+    pub method_get_short: JStaticMethodID<'a>,
+    pub method_get_short_ret: JavaType,
+    pub method_get_int: JStaticMethodID<'a>,
+    pub method_get_int_ret: JavaType,
+    pub method_get_long: JStaticMethodID<'a>,
+    pub method_get_long_ret: JavaType,
+    pub method_get_float: JStaticMethodID<'a>,
+    pub method_get_float_ret: JavaType,
+    pub method_get_double: JStaticMethodID<'a>,
+    pub method_get_double_ret: JavaType,
+    pub method_get_decimal: JStaticMethodID<'a>,
+    pub method_get_decimal_ret: JavaType,
+    pub method_get_string: JStaticMethodID<'a>,
+    pub method_get_string_ret: JavaType,
+    pub method_get_binary: JStaticMethodID<'a>,
+    pub method_get_binary_ret: JavaType,
+    pub method_is_null: JStaticMethodID<'a>,
+    pub method_is_null_ret: JavaType,
+}
+
+impl<'a> CometExec<'a> {
+    pub const JVM_CLASS: &'static str = "org/apache/spark/sql/comet/CometScalarSubquery";
+
+    pub fn new(env: &JNIEnv<'a>) -> JniResult<CometExec<'a>> {
+        // Get the global class reference
+        let class = get_global_jclass(env, Self::JVM_CLASS)?;
+
+        Ok(CometExec {
+            class,
+            method_get_bool: env
+                .get_static_method_id(class, "getBoolean", "(JJ)Z")
+                .unwrap(),
+            method_get_bool_ret: JavaType::Primitive(Primitive::Boolean),
+            method_get_byte: env.get_static_method_id(class, "getByte", "(JJ)B").unwrap(),
+            method_get_byte_ret: JavaType::Primitive(Primitive::Byte),
+            method_get_short: env
+                .get_static_method_id(class, "getShort", "(JJ)S")
+                .unwrap(),
+            method_get_short_ret: JavaType::Primitive(Primitive::Short),
+            method_get_int: env.get_static_method_id(class, "getInt", "(JJ)I").unwrap(),
+            method_get_int_ret: JavaType::Primitive(Primitive::Int),
+            method_get_long: env.get_static_method_id(class, "getLong", "(JJ)J").unwrap(),
+            method_get_long_ret: JavaType::Primitive(Primitive::Long),
+            method_get_float: env
+                .get_static_method_id(class, "getFloat", "(JJ)F")
+                .unwrap(),
+            method_get_float_ret: JavaType::Primitive(Primitive::Float),
+            method_get_double: env
+                .get_static_method_id(class, "getDouble", "(JJ)D")
+                .unwrap(),
+            method_get_double_ret: JavaType::Primitive(Primitive::Double),
+            method_get_decimal: env
+                .get_static_method_id(class, "getDecimal", "(JJ)[B")
+                .unwrap(),
+            method_get_decimal_ret: JavaType::Array(Box::new(JavaType::Primitive(Primitive::Byte))),
+            method_get_string: env
+                .get_static_method_id(class, "getString", "(JJ)Ljava/lang/String;")
+                .unwrap(),
+            method_get_string_ret: JavaType::Object("java/lang/String".to_owned()),
+            method_get_binary: env
+                .get_static_method_id(class, "getBinary", "(JJ)[B")
+                .unwrap(),
+            method_get_binary_ret: JavaType::Array(Box::new(JavaType::Primitive(Primitive::Byte))),
+            method_is_null: env.get_static_method_id(class, "isNull", "(JJ)Z").unwrap(),
+            method_is_null_ret: JavaType::Primitive(Primitive::Boolean),
+        })
+    }
+}
diff --git a/core/src/jvm_bridge/comet_metric_node.rs b/core/src/jvm_bridge/comet_metric_node.rs
new file mode 100644
index 000000000..1d4928a09
--- /dev/null
+++ b/core/src/jvm_bridge/comet_metric_node.rs
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use jni::{
+    errors::Result as JniResult,
+    objects::{JClass, JMethodID},
+    signature::{JavaType, Primitive},
+    JNIEnv,
+};
+
+use super::get_global_jclass;
+
+/// A struct that holds all the JNI methods and fields for JVM CometMetricNode class.
+pub struct CometMetricNode<'a> {
+    pub class: JClass<'a>,
+    pub method_get_child_node: JMethodID<'a>,
+    pub method_get_child_node_ret: JavaType,
+    pub method_add: JMethodID<'a>,
+    pub method_add_ret: JavaType,
+}
+
+impl<'a> CometMetricNode<'a> {
+    pub const JVM_CLASS: &'static str = "org/apache/spark/sql/comet/CometMetricNode";
+
+    pub fn new(env: &JNIEnv<'a>) -> JniResult<CometMetricNode<'a>> {
+        // Get the global class reference
+        let class = get_global_jclass(env, Self::JVM_CLASS)?;
+
+        Ok(CometMetricNode {
+            class,
+            method_get_child_node: env
+                .get_method_id(
+                    class,
+                    "getChildNode",
+                    format!("(I)L{:};", Self::JVM_CLASS).as_str(),
+                )
+                .unwrap(),
+            method_get_child_node_ret: JavaType::Object(Self::JVM_CLASS.to_owned()),
+            method_add: env
+                .get_method_id(class, "add", "(Ljava/lang/String;J)V")
+                .unwrap(),
+            method_add_ret: JavaType::Primitive(Primitive::Void),
+        })
+    }
+}
diff --git a/core/src/jvm_bridge/mod.rs b/core/src/jvm_bridge/mod.rs
new file mode 100644
index 000000000..6f162a0ea
--- /dev/null
+++ b/core/src/jvm_bridge/mod.rs
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! JNI JVM related functions
+
+use jni::{
+    errors::{Error, Result as JniResult},
+    objects::{JClass, JObject, JString, JValue},
+    AttachGuard, JNIEnv,
+};
+use once_cell::sync::OnceCell;
+
+/// Macro for converting JNI Error to Comet Error.
+#[macro_export]
+macro_rules! jni_map_error {
+    ($env:expr, $result:expr) => {{
+        match $result {
+            Ok(result) => datafusion::error::Result::Ok(result),
+            Err(jni_error) => Err($crate::errors::CometError::JNI { source: jni_error }),
+        }
+    }};
+}
+
+/// Macro for converting Rust types to JNI types.
+macro_rules! jvalues {
+    ($($args:expr,)* $(,)?) => {{
+        &[$(jni::objects::JValue::from($args)),*] as &[jni::objects::JValue]
+    }}
+}
+
+/// Macro for create a new JNI string.
+macro_rules! jni_new_string {
+    ($env:expr, $value:expr) => {{
+        $crate::jvm_bridge::jni_map_error!($env, $env.new_string($value))
+    }};
+}
+
+/// Macro for calling a JNI method.
+/// The syntax is:
+/// jni_call!(env, comet_metric_node(metric_node).add(jname, value) -> ())?;
+/// comet_metric_node is the class name stored in [[JVMClasses]].
+/// metric_node is the Java object on which the method is called.
+/// add is the method name.
+/// jname and value are the arguments.
+macro_rules! jni_call {
+    ($env:expr, $clsname:ident($obj:expr).$method:ident($($args:expr),* $(,)?) -> $ret:ty) => {{
+        $crate::jvm_bridge::jni_map_error!(
+            $env,
+            $env.call_method_unchecked(
+                $obj,
+                paste::paste! {$crate::jvm_bridge::JVMClasses::get().[<$clsname>].[<method_ $method>]},
+                paste::paste! {$crate::jvm_bridge::JVMClasses::get().[<$clsname>].[<method_ $method _ret>]}.clone(),
+                $crate::jvm_bridge::jvalues!($($args,)*)
+            )
+        ).and_then(|result| $crate::jvm_bridge::jni_map_error!($env, <$ret>::try_from(result)))
+    }}
+}
+
+macro_rules! jni_static_call {
+    ($env:expr, $clsname:ident.$method:ident($($args:expr),* $(,)?) -> $ret:ty) => {{
+        $crate::jvm_bridge::jni_map_error!(
+            $env,
+            $env.call_static_method_unchecked(
+                paste::paste! {$crate::jvm_bridge::JVMClasses::get().[<$clsname>].[<class>]},
+                paste::paste! {$crate::jvm_bridge::JVMClasses::get().[<$clsname>].[<method_ $method>]},
+                paste::paste! {$crate::jvm_bridge::JVMClasses::get().[<$clsname>].[<method_ $method _ret>]}.clone(),
+                $crate::jvm_bridge::jvalues!($($args,)*)
+            )
+        ).and_then(|result| $crate::jvm_bridge::jni_map_error!($env, <$ret>::try_from(result)))
+    }}
+}
+
+/// Wrapper for JString. Because we cannot implement `TryFrom` trait for `JString` as they
+/// are defined in different crates.
+pub struct StringWrapper<'a> {
+    value: JString<'a>,
+}
+
+impl<'a> StringWrapper<'a> {
+    pub fn new(value: JString<'a>) -> StringWrapper<'a> {
+        Self { value }
+    }
+
+    pub fn get(&self) -> &JString {
+        &self.value
+    }
+}
+
+pub struct BinaryWrapper<'a> {
+    value: JObject<'a>,
+}
+
+impl<'a> BinaryWrapper<'a> {
+    pub fn new(value: JObject<'a>) -> BinaryWrapper<'a> {
+        Self { value }
+    }
+
+    pub fn get(&self) -> &JObject {
+        &self.value
+    }
+}
+
+impl<'a> TryFrom<JValue<'a>> for StringWrapper<'a> {
+    type Error = Error;
+
+    fn try_from(value: JValue<'a>) -> Result<StringWrapper<'a>, Error> {
+        match value {
+            JValue::Object(b) => Ok(StringWrapper::new(JString::from(b))),
+            _ => Err(Error::WrongJValueType("object", value.type_name())),
+        }
+    }
+}
+
+impl<'a> TryFrom<JValue<'a>> for BinaryWrapper<'a> {
+    type Error = Error;
+
+    fn try_from(value: JValue<'a>) -> Result<BinaryWrapper<'a>, Error> {
+        match value {
+            JValue::Object(b) => Ok(BinaryWrapper::new(b)),
+            _ => Err(Error::WrongJValueType("object", value.type_name())),
+        }
+    }
+}
+
+/// Macro for creating a new global reference.
+macro_rules! jni_new_global_ref {
+    ($env:expr, $obj:expr) => {{
+        $crate::jni_map_error!($env, $env.new_global_ref($obj))
+    }};
+}
+
+pub(crate) use jni_call;
+pub(crate) use jni_map_error;
+pub(crate) use jni_new_global_ref;
+pub(crate) use jni_new_string;
+pub(crate) use jni_static_call;
+pub(crate) use jvalues;
+
+/// Gets a global reference to a Java class.
+pub fn get_global_jclass(env: &JNIEnv<'_>, cls: &str) -> JniResult<JClass<'static>> {
+    let local_jclass = env.find_class(cls)?;
+    let global = env.new_global_ref::<JObject>(local_jclass.into())?;
+
+    // A hack to make the `JObject` static. This is safe because the global reference is never
+    // gc-ed by the JVM before dropping the global reference.
+    let global_obj = unsafe { std::mem::transmute::<_, JObject<'static>>(global.as_obj()) };
+    // Prevent the global reference from being dropped.
+    let _ = std::mem::ManuallyDrop::new(global);
+
+    Ok(JClass::from(global_obj))
+}
+
+mod comet_exec;
+pub use comet_exec::*;
+mod comet_metric_node;
+use crate::JAVA_VM;
+pub use comet_metric_node::*;
+
+/// The JVM classes that are used in the JNI calls.
+pub struct JVMClasses<'a> {
+    /// The CometMetricNode class. Used for updating the metrics.
+    pub comet_metric_node: CometMetricNode<'a>,
+    /// The static CometExec class. Used for getting the subquery result.
+    pub comet_exec: CometExec<'a>,
+}
+
+unsafe impl<'a> Send for JVMClasses<'a> {}
+unsafe impl<'a> Sync for JVMClasses<'a> {}
+
+/// Keeps global references to JVM classes. Used for JNI calls to JVM.
+static JVM_CLASSES: OnceCell<JVMClasses> = OnceCell::new();
+
+impl JVMClasses<'_> {
+    /// Creates a new JVMClasses struct.
+    pub fn init(env: &JNIEnv) {
+        JVM_CLASSES.get_or_init(|| {
+            // A hack to make the `JNIEnv` static. It is not safe but we don't really use the
+            // `JNIEnv` except for creating the global references of the classes.
+            let env = unsafe { std::mem::transmute::<_, &'static JNIEnv>(env) };
+
+            JVMClasses {
+                comet_metric_node: CometMetricNode::new(env).unwrap(),
+                comet_exec: CometExec::new(env).unwrap(),
+            }
+        });
+    }
+
+    pub fn get() -> &'static JVMClasses<'static> {
+        unsafe { JVM_CLASSES.get_unchecked() }
+    }
+
+    /// Gets the JNIEnv for the current thread.
+    pub fn get_env() -> AttachGuard<'static> {
+        unsafe {
+            let java_vm = JAVA_VM.get_unchecked();
+            java_vm.attach_current_thread().unwrap()
+        }
+    }
+}
diff --git a/core/src/lib.rs b/core/src/lib.rs
new file mode 100644
index 000000000..c85263f4f
--- /dev/null
+++ b/core/src/lib.rs
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#![allow(incomplete_features)]
+#![allow(non_camel_case_types)]
+#![allow(dead_code)]
+#![allow(clippy::upper_case_acronyms)]
+#![allow(clippy::derive_partial_eq_without_eq)] // For prost generated struct
+#![cfg_attr(feature = "nightly", feature(core_intrinsics))]
+#![feature(int_roundings)]
+#![feature(specialization)]
+
+// Branch prediction hint. This is currently only available on nightly.
+#[cfg(feature = "nightly")]
+use core::intrinsics::{likely, unlikely};
+
+use jni::{
+    objects::{JClass, JString},
+    JNIEnv, JavaVM,
+};
+use log::{info, LevelFilter};
+use log4rs::{
+    append::console::ConsoleAppender,
+    config::{load_config_file, Appender, Deserializers, Root},
+    encode::pattern::PatternEncoder,
+    Config,
+};
+#[cfg(feature = "mimalloc")]
+use mimalloc::MiMalloc;
+use once_cell::sync::OnceCell;
+
+pub use data_type::*;
+
+use crate::errors::{try_unwrap_or_throw, CometError, CometResult};
+
+#[macro_use]
+mod errors;
+#[macro_use]
+pub mod common;
+mod data_type;
+pub mod execution;
+mod jvm_bridge;
+pub mod parquet;
+
+#[cfg(feature = "mimalloc")]
+#[global_allocator]
+static GLOBAL: MiMalloc = MiMalloc;
+
+static JAVA_VM: OnceCell<JavaVM> = OnceCell::new();
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_NativeBase_init(
+    env: JNIEnv,
+    _: JClass,
+    log_conf_path: JString,
+) {
+    // Initialize the error handling to capture panic backtraces
+    errors::init();
+
+    try_unwrap_or_throw(env, || {
+        let path: String = env.get_string(log_conf_path)?.into();
+
+        // empty path means there is no custom log4rs config file provided, so fallback to use
+        // the default configuration
+        let log_config = if path.is_empty() {
+            default_logger_config()
+        } else {
+            load_config_file(path, Deserializers::default())
+                .map_err(|err| CometError::Config(err.to_string()))
+        };
+
+        let _ = log4rs::init_config(log_config?).map_err(|err| CometError::Config(err.to_string()));
+
+        // Initialize the global Java VM
+        let java_vm = env.get_java_vm()?;
+        JAVA_VM.get_or_init(|| java_vm);
+
+        info!("Comet native library initialized");
+        Ok(())
+    })
+}
+
+const LOG_PATTERN: &str = "{d(%y/%m/%d %H:%M:%S)} {l} {f}: {m}{n}";
+
+// Creates a default log4rs config, which logs to console with `INFO` level.
+fn default_logger_config() -> CometResult<Config> {
+    let console_append = ConsoleAppender::builder()
+        .encoder(Box::new(PatternEncoder::new(LOG_PATTERN)))
+        .build();
+    let appender = Appender::builder().build("console", Box::new(console_append));
+    let root = Root::builder().appender("console").build(LevelFilter::Info);
+    Config::builder()
+        .appender(appender)
+        .build(root)
+        .map_err(|err| CometError::Config(err.to_string()))
+}
+
+// These are borrowed from hashbrown crate:
+//   https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs
+
+// On stable we can use #[cold] to get a equivalent effect: this attributes
+// suggests that the function is unlikely to be called
+#[cfg(not(feature = "nightly"))]
+#[inline]
+#[cold]
+fn cold() {}
+
+#[cfg(not(feature = "nightly"))]
+#[inline]
+fn likely(b: bool) -> bool {
+    if !b {
+        cold();
+    }
+    b
+}
+#[cfg(not(feature = "nightly"))]
+#[inline]
+fn unlikely(b: bool) -> bool {
+    if b {
+        cold();
+    }
+    b
+}
diff --git a/core/src/parquet/compression.rs b/core/src/parquet/compression.rs
new file mode 100644
index 000000000..37b857f4a
--- /dev/null
+++ b/core/src/parquet/compression.rs
@@ -0,0 +1,319 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Contains codec interface and supported codec implementations.
+//!
+//! See [`Compression`](crate::basic::Compression) enum for all available compression
+//! algorithms.
+//!
+//! # Example
+//!
+//! ```no_run
+//! use comet::parquet::{basic::Compression, compression::create_codec};
+//!
+//! let mut codec = match create_codec(Compression::SNAPPY) {
+//!     Ok(Some(codec)) => codec,
+//!     _ => panic!(),
+//! };
+//!
+//! let data = vec![b'p', b'a', b'r', b'q', b'u', b'e', b't'];
+//! let mut compressed = vec![];
+//! codec.compress(&data[..], &mut compressed).unwrap();
+//!
+//! let mut output = vec![];
+//! codec.decompress(&compressed[..], &mut output).unwrap();
+//!
+//! assert_eq!(output, data);
+//! ```
+
+use super::basic::Compression as CodecType;
+use crate::errors::{ParquetError, ParquetResult as Result};
+
+use brotli::Decompressor;
+use flate2::{read, write, Compression};
+use snap::raw::{decompress_len, max_compress_len, Decoder, Encoder};
+use std::io::{copy, Read, Write};
+
+/// Parquet compression codec interface.
+#[allow(clippy::ptr_arg)]
+pub trait Codec {
+    /// Compresses data stored in slice `input_buf` and writes the compressed result
+    /// to `output_buf`.
+    /// Note that you'll need to call `clear()` before reusing the same `output_buf`
+    /// across different `compress` calls.
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
+
+    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+    /// Returns the total number of bytes written.
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize>;
+}
+
+/// Given the compression type `codec`, returns a codec used to compress and decompress
+/// bytes for the compression type.
+/// This returns `None` if the codec type is `UNCOMPRESSED`.
+pub fn create_codec(codec: CodecType) -> Result<Option<Box<dyn Codec>>> {
+    match codec {
+        CodecType::BROTLI => Ok(Some(Box::new(BrotliCodec::new()))),
+        CodecType::GZIP => Ok(Some(Box::new(GZipCodec::new()))),
+        CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
+        CodecType::LZ4 => Ok(Some(Box::new(LZ4Codec::new()))),
+        CodecType::ZSTD => Ok(Some(Box::new(ZSTDCodec::new()))),
+        CodecType::UNCOMPRESSED => Ok(None),
+        _ => Err(nyi_err!("The codec type {} is not supported yet", codec)),
+    }
+}
+
+/// Codec for Snappy compression format.
+pub struct SnappyCodec {
+    decoder: Decoder,
+    encoder: Encoder,
+}
+
+impl SnappyCodec {
+    /// Creates new Snappy compression codec.
+    pub(crate) fn new() -> Self {
+        Self {
+            decoder: Decoder::new(),
+            encoder: Encoder::new(),
+        }
+    }
+}
+
+impl Codec for SnappyCodec {
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize> {
+        let len = decompress_len(input_buf)?;
+        output_buf.resize(len, 0);
+        self.decoder
+            .decompress(input_buf, output_buf)
+            .map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let output_buf_len = output_buf.len();
+        let required_len = max_compress_len(input_buf.len());
+        output_buf.resize(output_buf_len + required_len, 0);
+        let n = self
+            .encoder
+            .compress(input_buf, &mut output_buf[output_buf_len..])?;
+        output_buf.truncate(output_buf_len + n);
+        Ok(())
+    }
+}
+
+/// Codec for GZIP compression algorithm.
+pub struct GZipCodec {}
+
+impl GZipCodec {
+    /// Creates new GZIP compression codec.
+    pub(crate) fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for GZipCodec {
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize> {
+        let mut decoder = read::GzDecoder::new(input_buf);
+        decoder.read_to_end(output_buf).map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = write::GzEncoder::new(output_buf, Compression::default());
+        encoder.write_all(input_buf)?;
+        encoder.try_finish().map_err(|e| e.into())
+    }
+}
+
+const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
+const BROTLI_DEFAULT_COMPRESSION_QUALITY: u32 = 1; // supported levels 0-9
+const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
+
+/// Codec for Brotli compression algorithm.
+pub struct BrotliCodec {}
+
+impl BrotliCodec {
+    /// Creates new Brotli compression codec.
+    pub(crate) fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for BrotliCodec {
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize> {
+        Decompressor::new(input_buf, BROTLI_DEFAULT_BUFFER_SIZE)
+            .read_to_end(output_buf)
+            .map_err(|e| e.into())
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = brotli::CompressorWriter::new(
+            output_buf,
+            BROTLI_DEFAULT_BUFFER_SIZE,
+            BROTLI_DEFAULT_COMPRESSION_QUALITY,
+            BROTLI_DEFAULT_LG_WINDOW_SIZE,
+        );
+        encoder.write_all(input_buf)?;
+        encoder.flush().map_err(|e| e.into())
+    }
+}
+
+const LZ4_BUFFER_SIZE: usize = 4096;
+
+/// Codec for LZ4 compression algorithm.
+pub struct LZ4Codec {}
+
+impl LZ4Codec {
+    /// Creates new LZ4 compression codec.
+    pub(crate) fn new() -> Self {
+        Self {}
+    }
+}
+
+impl Codec for LZ4Codec {
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize> {
+        let mut decoder = lz4::Decoder::new(input_buf)?;
+        let mut buffer: [u8; LZ4_BUFFER_SIZE] = [0; LZ4_BUFFER_SIZE];
+        let mut total_len = 0;
+        loop {
+            let len = decoder.read(&mut buffer)?;
+            if len == 0 {
+                break;
+            }
+            total_len += len;
+            output_buf.write_all(&buffer[0..len])?;
+        }
+        Ok(total_len)
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = lz4::EncoderBuilder::new().build(output_buf)?;
+        let mut from = 0;
+        loop {
+            let to = std::cmp::min(from + LZ4_BUFFER_SIZE, input_buf.len());
+            encoder.write_all(&input_buf[from..to])?;
+            from += LZ4_BUFFER_SIZE;
+            if from >= input_buf.len() {
+                break;
+            }
+        }
+        encoder.finish().1.map_err(|e| e.into())
+    }
+}
+
+/// Codec for Zstandard compression algorithm.
+pub struct ZSTDCodec {}
+
+impl ZSTDCodec {
+    /// Creates new Zstandard compression codec.
+    pub(crate) fn new() -> Self {
+        Self {}
+    }
+}
+
+/// Compression level (1-21) for ZSTD. Choose 1 here for better compression speed.
+const ZSTD_COMPRESSION_LEVEL: i32 = 1;
+
+impl Codec for ZSTDCodec {
+    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize> {
+        let mut decoder = zstd::Decoder::new(input_buf)?;
+        match copy(&mut decoder, output_buf) {
+            Ok(n) => Ok(n as usize),
+            Err(e) => Err(e.into()),
+        }
+    }
+
+    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
+        let mut encoder = zstd::Encoder::new(output_buf, ZSTD_COMPRESSION_LEVEL)?;
+        encoder.write_all(input_buf)?;
+        match encoder.finish() {
+            Ok(_) => Ok(()),
+            Err(e) => Err(e.into()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::parquet::util::test_common::*;
+
+    fn test_roundtrip(c: CodecType, data: &[u8]) {
+        let mut c1 = create_codec(c).unwrap().unwrap();
+        let mut c2 = create_codec(c).unwrap().unwrap();
+
+        // Compress with c1
+        let mut compressed = Vec::new();
+        let mut decompressed = Vec::new();
+        c1.compress(data, &mut compressed)
+            .expect("Error when compressing");
+
+        // Decompress with c2
+        let mut decompressed_size = c2
+            .decompress(compressed.as_slice(), &mut decompressed)
+            .expect("Error when decompressing");
+        assert_eq!(data.len(), decompressed_size);
+        decompressed.truncate(decompressed_size);
+        assert_eq!(data, decompressed.as_slice());
+
+        compressed.clear();
+
+        // Compress with c2
+        c2.compress(data, &mut compressed)
+            .expect("Error when compressing");
+
+        // Decompress with c1
+        decompressed_size = c1
+            .decompress(compressed.as_slice(), &mut decompressed)
+            .expect("Error when decompressing");
+        assert_eq!(data.len(), decompressed_size);
+        decompressed.truncate(decompressed_size);
+        assert_eq!(data, decompressed.as_slice());
+    }
+
+    fn test_codec(c: CodecType) {
+        let sizes = vec![100, 10000, 100000];
+        for size in sizes {
+            let data = random_bytes(size);
+            test_roundtrip(c, &data);
+        }
+    }
+
+    #[test]
+    fn test_codec_snappy() {
+        test_codec(CodecType::SNAPPY);
+    }
+
+    #[test]
+    fn test_codec_gzip() {
+        test_codec(CodecType::GZIP);
+    }
+
+    #[test]
+    fn test_codec_brotli() {
+        test_codec(CodecType::BROTLI);
+    }
+
+    #[test]
+    fn test_codec_lz4() {
+        test_codec(CodecType::LZ4);
+    }
+
+    #[test]
+    fn test_codec_zstd() {
+        test_codec(CodecType::ZSTD);
+    }
+}
diff --git a/core/src/parquet/data_type.rs b/core/src/parquet/data_type.rs
new file mode 100644
index 000000000..0fc960e9b
--- /dev/null
+++ b/core/src/parquet/data_type.rs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::read::{PlainDecoding, PlainDictDecoding};
+
+pub trait DataType: PlainDecoding + PlainDictDecoding + 'static {}
+
+macro_rules! make_type {
+    ($name:ident) => {
+        pub struct $name {}
+        impl DataType for $name {}
+    };
+}
+
+make_type!(BoolType);
+make_type!(Int8Type);
+make_type!(UInt8Type);
+make_type!(Int16Type);
+make_type!(UInt16Type);
+make_type!(Int32Type);
+make_type!(Int32To64Type);
+make_type!(UInt32Type);
+make_type!(Int64Type);
+make_type!(UInt64Type);
+make_type!(FloatType);
+make_type!(DoubleType);
+make_type!(FloatToDoubleType);
+make_type!(ByteArrayType);
+make_type!(StringType);
+make_type!(Int32DecimalType);
+make_type!(Int64DecimalType);
+make_type!(FLBADecimalType);
+make_type!(FLBADecimal32Type);
+make_type!(FLBADecimal64Type);
+make_type!(FLBAType);
+make_type!(Int32DateType);
+make_type!(Int64TimestampMillisType);
+make_type!(Int64TimestampMicrosType);
+make_type!(Int96TimestampMicrosType);
+
+pub trait AsBytes {
+    /// Returns the slice of bytes for an instance of this data type.
+    fn as_bytes(&self) -> &[u8];
+}
+
+impl AsBytes for Vec<u8> {
+    fn as_bytes(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
+impl<'a> AsBytes for &'a str {
+    fn as_bytes(&self) -> &[u8] {
+        (self as &str).as_bytes()
+    }
+}
+
+impl AsBytes for [u8] {
+    fn as_bytes(&self) -> &[u8] {
+        self
+    }
+}
+
+impl AsBytes for str {
+    fn as_bytes(&self) -> &[u8] {
+        (self as &str).as_bytes()
+    }
+}
+
+macro_rules! make_as_bytes {
+    ($source_ty:ident) => {
+        impl AsBytes for $source_ty {
+            #[allow(clippy::size_of_in_element_count)]
+            fn as_bytes(&self) -> &[u8] {
+                unsafe {
+                    ::std::slice::from_raw_parts(
+                        self as *const $source_ty as *const u8,
+                        ::std::mem::size_of::<$source_ty>(),
+                    )
+                }
+            }
+        }
+    };
+}
+
+make_as_bytes!(bool);
+make_as_bytes!(i8);
+make_as_bytes!(u8);
+make_as_bytes!(i16);
+make_as_bytes!(u16);
+make_as_bytes!(i32);
+make_as_bytes!(u32);
+make_as_bytes!(i64);
+make_as_bytes!(u64);
+make_as_bytes!(f32);
+make_as_bytes!(f64);
+make_as_bytes!(i128);
diff --git a/core/src/parquet/mod.rs b/core/src/parquet/mod.rs
new file mode 100644
index 000000000..b1a7b939c
--- /dev/null
+++ b/core/src/parquet/mod.rs
@@ -0,0 +1,562 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod data_type;
+pub mod mutable_vector;
+pub use mutable_vector::*;
+
+#[macro_use]
+pub mod util;
+pub mod read;
+
+use std::{boxed::Box, ptr::NonNull, sync::Arc};
+
+use crate::errors::{try_unwrap_or_throw, CometError};
+
+use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
+
+/// JNI exposed methods
+use jni::JNIEnv;
+use jni::{
+    objects::{GlobalRef, JByteBuffer, JClass},
+    sys::{
+        jboolean, jbooleanArray, jbyte, jbyteArray, jdouble, jfloat, jint, jintArray, jlong,
+        jlongArray, jobject, jobjectArray, jshort,
+    },
+};
+
+use crate::execution::utils::SparkArrowConvert;
+use arrow::buffer::{Buffer, MutableBuffer};
+use jni::objects::ReleaseMode;
+use read::ColumnReader;
+use util::jni::{convert_column_descriptor, convert_encoding};
+
+use self::util::jni::TypePromotionInfo;
+
+const STR_CLASS_NAME: &str = "java/lang/String";
+
+/// Parquet read context maintained across multiple JNI calls.
+struct Context {
+    pub column_reader: ColumnReader,
+    pub arrays: Option<(Arc<FFI_ArrowArray>, Arc<FFI_ArrowSchema>)>,
+    last_data_page: Option<GlobalRef>,
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_initColumnReader(
+    env: JNIEnv,
+    _jclass: JClass,
+    primitive_type: jint,
+    logical_type: jint,
+    read_primitive_type: jint,
+    jni_path: jobjectArray,
+    max_dl: jint,
+    max_rl: jint,
+    bit_width: jint,
+    is_signed: jboolean,
+    type_length: jint,
+    precision: jint,
+    read_precision: jint,
+    scale: jint,
+    time_unit: jint,
+    is_adjusted_utc: jboolean,
+    batch_size: jint,
+    use_decimal_128: jboolean,
+    use_legacy_date_timestamp: jboolean,
+) -> jlong {
+    try_unwrap_or_throw(env, || {
+        let desc = convert_column_descriptor(
+            &env,
+            primitive_type,
+            logical_type,
+            max_dl,
+            max_rl,
+            bit_width,
+            is_signed,
+            type_length,
+            precision,
+            scale,
+            time_unit,
+            is_adjusted_utc,
+            jni_path,
+        )?;
+        let promotion_info = TypePromotionInfo::new_from_jni(read_primitive_type, read_precision);
+        let ctx = Context {
+            column_reader: ColumnReader::get(
+                desc,
+                promotion_info,
+                batch_size as usize,
+                use_decimal_128 != 0,
+                use_legacy_date_timestamp != 0,
+            ),
+            arrays: None,
+            last_data_page: None,
+        };
+        let res = Box::new(ctx);
+        Ok(Box::into_raw(res) as i64)
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setDictionaryPage(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    page_value_count: jint,
+    page_data: jbyteArray,
+    encoding: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        // convert value encoding ordinal to the native encoding definition
+        let encoding = convert_encoding(encoding);
+
+        // copy the input on-heap buffer to native
+        let page_len = env.get_array_length(page_data)?;
+        let mut buffer = MutableBuffer::from_len_zeroed(page_len as usize);
+        env.get_byte_array_region(page_data, 0, from_u8_slice(buffer.as_slice_mut()))?;
+
+        reader.set_dictionary_page(page_value_count as usize, buffer.into(), encoding);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setPageV1(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    page_value_count: jint,
+    page_data: jbyteArray,
+    value_encoding: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        // convert value encoding ordinal to the native encoding definition
+        let encoding = convert_encoding(value_encoding);
+
+        // copy the input on-heap buffer to native
+        let page_len = env.get_array_length(page_data)?;
+        let mut buffer = MutableBuffer::from_len_zeroed(page_len as usize);
+        env.get_byte_array_region(page_data, 0, from_u8_slice(buffer.as_slice_mut()))?;
+
+        reader.set_page_v1(page_value_count as usize, buffer.into(), encoding);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setPageBufferV1(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    page_value_count: jint,
+    buffer: jobject,
+    value_encoding: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let ctx = get_context(handle)?;
+        let reader = &mut ctx.column_reader;
+
+        // convert value encoding ordinal to the native encoding definition
+        let encoding = convert_encoding(value_encoding);
+
+        // Get slices from Java DirectByteBuffer
+        let jbuffer = JByteBuffer::from(buffer);
+
+        // Convert the page to global reference so it won't get GC'd by Java. Also free the last
+        // page if there is any.
+        ctx.last_data_page = Some(env.new_global_ref(jbuffer)?);
+
+        let buf_slice = env.get_direct_buffer_address(jbuffer)?;
+
+        unsafe {
+            let page_ptr = NonNull::new_unchecked(buf_slice.as_ptr() as *mut u8);
+            let buffer = Buffer::from_custom_allocation(
+                page_ptr,
+                buf_slice.len(),
+                Arc::new(FFI_ArrowArray::empty()),
+            );
+            reader.set_page_v1(page_value_count as usize, buffer, encoding);
+        }
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setPageV2(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    page_value_count: jint,
+    def_level_data: jbyteArray,
+    rep_level_data: jbyteArray,
+    value_data: jbyteArray,
+    value_encoding: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        // convert value encoding ordinal to the native encoding definition
+        let encoding = convert_encoding(value_encoding);
+
+        // copy the input on-heap buffer to native
+        let dl_len = env.get_array_length(def_level_data)?;
+        let mut dl_buffer = MutableBuffer::from_len_zeroed(dl_len as usize);
+        env.get_byte_array_region(def_level_data, 0, from_u8_slice(dl_buffer.as_slice_mut()))?;
+
+        let rl_len = env.get_array_length(rep_level_data)?;
+        let mut rl_buffer = MutableBuffer::from_len_zeroed(rl_len as usize);
+        env.get_byte_array_region(rep_level_data, 0, from_u8_slice(rl_buffer.as_slice_mut()))?;
+
+        let v_len = env.get_array_length(value_data)?;
+        let mut v_buffer = MutableBuffer::from_len_zeroed(v_len as usize);
+        env.get_byte_array_region(value_data, 0, from_u8_slice(v_buffer.as_slice_mut()))?;
+
+        reader.set_page_v2(
+            page_value_count as usize,
+            dl_buffer.into(),
+            rl_buffer.into(),
+            v_buffer.into(),
+            encoding,
+        );
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setNull(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_null();
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setBoolean(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jboolean,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_boolean(value != 0);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setByte(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jbyte,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<i8>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setShort(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jshort,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<i16>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setInt(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<i32>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setLong(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jlong,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<i64>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setFloat(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jfloat,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<f32>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setDouble(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jdouble,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_fixed::<f64>(value);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setBinary(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jbyteArray,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        let len = env.get_array_length(value)?;
+        let mut buffer = MutableBuffer::from_len_zeroed(len as usize);
+        env.get_byte_array_region(value, 0, from_u8_slice(buffer.as_slice_mut()))?;
+        reader.set_binary(buffer);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setDecimal(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jbyteArray,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        let len = env.get_array_length(value)?;
+        let mut buffer = MutableBuffer::from_len_zeroed(len as usize);
+        env.get_byte_array_region(value, 0, from_u8_slice(buffer.as_slice_mut()))?;
+        reader.set_decimal_flba(buffer);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setPosition(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    value: jlong,
+    size: jint,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.set_position(value, size as usize);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setIndices(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    offset: jlong,
+    batch_size: jint,
+    indices: jlongArray,
+) -> jlong {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        let indices = env.get_long_array_elements(indices, ReleaseMode::NoCopyBack)?;
+        let len = indices.size()? as usize;
+        // paris alternately contains start index and length of continuous indices
+        let pairs = unsafe { core::slice::from_raw_parts_mut(indices.as_ptr(), len) };
+        let mut skipped = 0;
+        let mut filled = 0;
+        for i in (0..len).step_by(2) {
+            let index = pairs[i];
+            let count = pairs[i + 1];
+            let skip = std::cmp::min(count, offset - skipped);
+            skipped += skip;
+            if count == skip {
+                continue;
+            } else if batch_size as i64 == filled {
+                break;
+            }
+            let count = std::cmp::min(count - skip, batch_size as i64 - filled);
+            filled += count;
+            reader.set_position(index + skip, count as usize);
+        }
+        Ok(filled)
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_setIsDeleted(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    is_deleted: jbooleanArray,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+
+        let len = env.get_array_length(is_deleted)?;
+        let mut buffer = MutableBuffer::from_len_zeroed(len as usize);
+        env.get_boolean_array_region(is_deleted, 0, buffer.as_slice_mut())?;
+        reader.set_is_deleted(buffer);
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_resetBatch(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+) {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        reader.reset_batch();
+        Ok(())
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_readBatch(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    batch_size: jint,
+    null_pad_size: jint,
+) -> jintArray {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        let (num_values, num_nulls) =
+            reader.read_batch(batch_size as usize, null_pad_size as usize);
+        let res = env.new_int_array(2)?;
+        let buf: [i32; 2] = [num_values as i32, num_nulls as i32];
+        env.set_int_array_region(res, 0, &buf)?;
+        Ok(res)
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_skipBatch(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+    batch_size: jint,
+    discard: jboolean,
+) -> jint {
+    try_unwrap_or_throw(env, || {
+        let reader = get_reader(handle)?;
+        Ok(reader.skip_batch(batch_size as usize, discard == 0) as jint)
+    })
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_currentBatch(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+) -> jlongArray {
+    try_unwrap_or_throw(env, || {
+        let ctx = get_context(handle)?;
+        let reader = &mut ctx.column_reader;
+        let data = reader.current_batch();
+        let (array, schema) = data.to_spark()?;
+
+        unsafe {
+            let arrow_array = Arc::from_raw(array as *const FFI_ArrowArray);
+            let arrow_schema = Arc::from_raw(schema as *const FFI_ArrowSchema);
+            ctx.arrays = Some((arrow_array, arrow_schema));
+
+            let res = env.new_long_array(2)?;
+            let buf: [i64; 2] = [array, schema];
+            env.set_long_array_region(res, 0, &buf)
+                .expect("set long array region failed");
+            Ok(res)
+        }
+    })
+}
+
+#[inline]
+fn get_context<'a>(handle: jlong) -> Result<&'a mut Context, CometError> {
+    unsafe {
+        (handle as *mut Context)
+            .as_mut()
+            .ok_or_else(|| CometError::NullPointer("null context handle".to_string()))
+    }
+}
+
+#[inline]
+fn get_reader<'a>(handle: jlong) -> Result<&'a mut ColumnReader, CometError> {
+    Ok(&mut get_context(handle)?.column_reader)
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_apache_comet_parquet_Native_closeColumnReader(
+    env: JNIEnv,
+    _jclass: JClass,
+    handle: jlong,
+) {
+    try_unwrap_or_throw(env, || {
+        unsafe {
+            let ctx = handle as *mut Context;
+            let _ = Box::from_raw(ctx);
+        };
+        Ok(())
+    })
+}
+
+fn from_u8_slice(src: &mut [u8]) -> &mut [i8] {
+    let raw_ptr = src.as_mut_ptr() as *mut i8;
+    unsafe { std::slice::from_raw_parts_mut(raw_ptr, src.len()) }
+}
diff --git a/core/src/parquet/mutable_vector.rs b/core/src/parquet/mutable_vector.rs
new file mode 100644
index 000000000..f1428fd39
--- /dev/null
+++ b/core/src/parquet/mutable_vector.rs
@@ -0,0 +1,251 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{array::ArrayData, datatypes::DataType as ArrowDataType};
+
+use crate::common::{bit, CometBuffer};
+
+const DEFAULT_ARRAY_LEN: usize = 4;
+
+/// A mutable vector that can be re-used across batches, for Parquet read.
+///
+/// Note this class is similar to [`MutableVector`](crate::common::MutableVector). However, the
+/// latter has functionalities such as `ValueGetter`, `ValueSetter`. In addition, it represents
+/// String and Binary data using [`StringView`](crate::data_type::StringView), while this struct
+/// uses Arrow format to represent them.
+///
+/// TODO: unify the two structs in future
+#[derive(Debug)]
+pub struct ParquetMutableVector {
+    /// The Arrow type for the elements of this vector.
+    pub(crate) arrow_type: ArrowDataType,
+
+    /// The number of total elements in this vector.
+    pub(crate) num_values: usize,
+
+    /// The number of null elements in this vector, must <= `num_values`.
+    pub(crate) num_nulls: usize,
+
+    /// The capacity of the vector
+    pub(crate) capacity: usize,
+
+    /// How many bits are required to store a single value
+    pub(crate) bit_width: usize,
+
+    /// The validity buffer of this Arrow vector. A bit set at position `i` indicates the `i`th
+    /// element is not null. Otherwise, an unset bit at position `i` indicates the `i`th element is
+    /// null.
+    pub(crate) validity_buffer: CometBuffer,
+
+    /// The value buffer of this Arrow vector. This could store either offsets if the vector
+    /// is of list or struct type, or actual values themselves otherwise.
+    pub(crate) value_buffer: CometBuffer,
+
+    /// Child vectors for non-primitive types (e.g., list, struct).
+    pub(crate) children: Vec<ParquetMutableVector>,
+
+    /// Dictionary (i.e., values) associated with this vector. Only set if using dictionary
+    /// encoding.
+    pub(crate) dictionary: Option<Box<ParquetMutableVector>>,
+}
+
+impl ParquetMutableVector {
+    pub fn new(capacity: usize, arrow_type: &ArrowDataType) -> Self {
+        let bit_width = Self::bit_width(arrow_type);
+        Self::new_with_bit_width(capacity, arrow_type.clone(), bit_width)
+    }
+
+    pub fn new_with_bit_width(
+        capacity: usize,
+        arrow_type: ArrowDataType,
+        bit_width: usize,
+    ) -> Self {
+        let validity_len = bit::ceil(capacity, 8);
+        let validity_buffer = CometBuffer::new(validity_len);
+
+        let mut value_capacity = capacity;
+        if Self::is_binary_type(&arrow_type) {
+            // Arrow offset array needs to have one extra slot
+            value_capacity += 1;
+        }
+        // Make sure the capacity is positive
+        let len = bit::ceil(value_capacity * bit_width, 8);
+        let mut value_buffer = CometBuffer::new(len);
+
+        let mut children = Vec::new();
+
+        match arrow_type {
+            ArrowDataType::Binary | ArrowDataType::Utf8 => {
+                children.push(ParquetMutableVector::new_with_bit_width(
+                    capacity,
+                    ArrowDataType::Int8,
+                    DEFAULT_ARRAY_LEN * 8,
+                ));
+            }
+            _ => {}
+        }
+
+        if Self::is_binary_type(&arrow_type) {
+            // Setup the first offset which is always 0.
+            let zero: u32 = 0;
+            bit::memcpy_value(&zero, 4, &mut value_buffer);
+        }
+
+        Self {
+            arrow_type,
+            num_values: 0,
+            num_nulls: 0,
+            capacity,
+            bit_width,
+            validity_buffer,
+            value_buffer,
+            children,
+            dictionary: None,
+        }
+    }
+
+    /// Whether the given value at `idx` of this vector is null.
+    #[inline]
+    pub fn is_null(&self, idx: usize) -> bool {
+        unsafe { !bit::get_bit_raw(self.validity_buffer.as_ptr(), idx) }
+    }
+
+    /// Resets this vector to the initial state.
+    #[inline]
+    pub fn reset(&mut self) {
+        self.num_values = 0;
+        self.num_nulls = 0;
+        self.validity_buffer.reset();
+        if Self::is_binary_type(&self.arrow_type) {
+            // Reset the first offset to 0
+            let zero: u32 = 0;
+            bit::memcpy_value(&zero, 4, &mut self.value_buffer);
+            // Also reset the child value vector
+            let child = &mut self.children[0];
+            child.reset();
+        } else if Self::should_reset_value_buffer(&self.arrow_type) {
+            self.value_buffer.reset();
+        }
+    }
+
+    /// Appends a new null value to the end of this vector.
+    #[inline]
+    pub fn put_null(&mut self) {
+        self.put_nulls(1)
+    }
+
+    /// Appends `n` null values to the end of this vector.
+    #[inline]
+    pub fn put_nulls(&mut self, n: usize) {
+        // We need to update offset buffer for binary.
+        if Self::is_binary_type(&self.arrow_type) {
+            let mut offset = self.num_values * 4;
+            let prev_offset_value = bit::read_num_bytes_u32(4, &self.value_buffer[offset..]);
+            offset += 4;
+            (0..n).for_each(|_| {
+                bit::memcpy_value(&prev_offset_value, 4, &mut self.value_buffer[offset..]);
+                offset += 4;
+            });
+        }
+
+        self.num_nulls += n;
+        self.num_values += n;
+    }
+
+    /// Returns the number of total values (including both null and non-null) of this vector.
+    #[inline]
+    pub fn num_values(&self) -> usize {
+        self.num_values
+    }
+
+    /// Returns the number of null values of this vector.
+    #[inline]
+    pub fn num_nulls(&self) -> usize {
+        self.num_nulls
+    }
+
+    /// Sets the dictionary of this to be `dict`.
+    pub fn set_dictionary(&mut self, dict: ParquetMutableVector) {
+        self.dictionary = Some(Box::new(dict))
+    }
+
+    /// Clones this into an Arrow [`ArrayData`](arrow::array::ArrayData). Note that the caller of
+    /// this method MUST make sure the returned `ArrayData` won't live longer than this vector
+    /// itself. Otherwise, dangling pointer may happen.
+    ///
+    /// # Safety
+    ///
+    /// This method is highly unsafe since it calls `CometBuffer::to_arrow` which leaks raw
+    /// pointer to the memory region that are tracked by `CometBuffer`. Please see comments on
+    /// `to_arrow` buffer to understand the motivation.
+    pub fn get_array_data(&mut self) -> ArrayData {
+        unsafe {
+            let data_type = if let Some(d) = &self.dictionary {
+                ArrowDataType::Dictionary(
+                    Box::new(ArrowDataType::Int32),
+                    Box::new(d.arrow_type.clone()),
+                )
+            } else {
+                self.arrow_type.clone()
+            };
+            let mut builder = ArrayData::builder(data_type)
+                .len(self.num_values)
+                .add_buffer(self.value_buffer.to_arrow())
+                .null_bit_buffer(Some(self.validity_buffer.to_arrow()))
+                .null_count(self.num_nulls);
+
+            if Self::is_binary_type(&self.arrow_type) && self.dictionary.is_none() {
+                let child = &mut self.children[0];
+                builder = builder.add_buffer(child.value_buffer.to_arrow());
+            }
+
+            if let Some(d) = &mut self.dictionary {
+                builder = builder.add_child_data(d.get_array_data());
+            }
+
+            builder.build_unchecked()
+        }
+    }
+
+    /// Returns the number of bits it takes to store one element of `arrow_type` in the value buffer
+    /// of this vector.
+    pub fn bit_width(arrow_type: &ArrowDataType) -> usize {
+        match arrow_type {
+            ArrowDataType::Boolean => 1,
+            ArrowDataType::Int8 => 8,
+            ArrowDataType::Int16 => 16,
+            ArrowDataType::Int32 | ArrowDataType::Float32 | ArrowDataType::Date32 => 32,
+            ArrowDataType::Int64 | ArrowDataType::Float64 | ArrowDataType::Timestamp(_, _) => 64,
+            ArrowDataType::FixedSizeBinary(type_length) => *type_length as usize * 8,
+            ArrowDataType::Decimal128(..) => 128, // Arrow stores decimal with 16 bytes
+            ArrowDataType::Binary | ArrowDataType::Utf8 => 32, // Only count offset size
+            dt => panic!("Unsupported Arrow data type: {:?}", dt),
+        }
+    }
+
+    #[inline]
+    fn is_binary_type(dt: &ArrowDataType) -> bool {
+        matches!(dt, ArrowDataType::Binary | ArrowDataType::Utf8)
+    }
+
+    #[inline]
+    fn should_reset_value_buffer(dt: &ArrowDataType) -> bool {
+        // - Boolean type expects have a zeroed value buffer
+        // - Decimal may pad buffer with 0xff so we need to clear them before a new batch
+        matches!(dt, ArrowDataType::Boolean | ArrowDataType::Decimal128(_, _))
+    }
+}
diff --git a/core/src/parquet/read/column.rs b/core/src/parquet/read/column.rs
new file mode 100644
index 000000000..6fc73f93f
--- /dev/null
+++ b/core/src/parquet/read/column.rs
@@ -0,0 +1,828 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{marker::PhantomData, sync::Arc};
+
+use arrow::{
+    array::ArrayData,
+    buffer::{Buffer, MutableBuffer},
+    datatypes::{ArrowNativeType, DataType as ArrowDataType, TimeUnit},
+};
+
+use parquet::{
+    basic::{Encoding, LogicalType, TimeUnit as ParquetTimeUnit, Type as PhysicalType},
+    schema::types::{ColumnDescPtr, ColumnDescriptor},
+};
+
+use crate::parquet::{
+    data_type::*, read::DECIMAL_BYTE_WIDTH, util::jni::TypePromotionInfo, ParquetMutableVector,
+};
+
+use super::{
+    levels::LevelDecoder,
+    values::{get_decoder, Decoder},
+    ReadOptions,
+};
+
+use crate::common::{bit, bit::log2};
+
+/// Maximum number of decimal digits an i32 can represent
+const DECIMAL_MAX_INT_DIGITS: i32 = 9;
+
+/// Maximum number of decimal digits an i64 can represent
+const DECIMAL_MAX_LONG_DIGITS: i32 = 18;
+
+pub enum ColumnReader {
+    BoolColumnReader(TypedColumnReader<BoolType>),
+    Int8ColumnReader(TypedColumnReader<Int8Type>),
+    UInt8ColumnReader(TypedColumnReader<UInt8Type>),
+    Int16ColumnReader(TypedColumnReader<Int16Type>),
+    UInt16ColumnReader(TypedColumnReader<UInt16Type>),
+    Int32ColumnReader(TypedColumnReader<Int32Type>),
+    Int32To64ColumnReader(TypedColumnReader<Int32To64Type>),
+    UInt32ColumnReader(TypedColumnReader<UInt32Type>),
+    Int32DecimalColumnReader(TypedColumnReader<Int32DecimalType>),
+    Int32DateColumnReader(TypedColumnReader<Int32DateType>),
+    Int64ColumnReader(TypedColumnReader<Int64Type>),
+    UInt64DecimalColumnReader(TypedColumnReader<UInt64Type>),
+    Int64DecimalColumnReader(TypedColumnReader<Int64DecimalType>),
+    Int64TimestampMillisColumnReader(TypedColumnReader<Int64TimestampMillisType>),
+    Int64TimestampMicrosColumnReader(TypedColumnReader<Int64TimestampMicrosType>),
+    Int64TimestampNanosColumnReader(TypedColumnReader<Int64Type>),
+    Int96ColumnReader(TypedColumnReader<Int96TimestampMicrosType>),
+    FloatColumnReader(TypedColumnReader<FloatType>),
+    FloatToDoubleColumnReader(TypedColumnReader<FloatToDoubleType>),
+    DoubleColumnReader(TypedColumnReader<DoubleType>),
+    ByteArrayColumnReader(TypedColumnReader<ByteArrayType>),
+    StringColumnReader(TypedColumnReader<StringType>),
+    FLBADecimalColumnReader(TypedColumnReader<FLBADecimalType>),
+    FLBADecimal32ColumnReader(TypedColumnReader<FLBADecimal32Type>),
+    FLBADecimal64ColumnReader(TypedColumnReader<FLBADecimal64Type>),
+    FLBAColumnReader(TypedColumnReader<FLBAType>),
+}
+
+impl ColumnReader {
+    /// Creates a new column reader according to the input `desc`.
+    ///
+    /// - `desc`: The actual descriptor for the underlying Parquet files
+    /// - `promotion_info`: Extra information about type promotion. This is passed in to support
+    ///   schema evolution, e.g., int -> long, where Parquet type is int but Spark type is long.
+    /// - `use_decimal_128`: Whether to read small precision decimals as `i128` instead of as `i32`
+    ///   or `i64` as Spark does
+    /// - `use_legacy_date_timestamp_or_ntz`: Whether to read dates/timestamps that were written
+    ///   using the legacy Julian/Gregorian hybrid calendar as it is. If false, exceptions will be
+    ///   thrown. If the spark type is TimestampNTZ, this should be true.
+    pub fn get(
+        desc: ColumnDescriptor,
+        promotion_info: TypePromotionInfo,
+        capacity: usize,
+        use_decimal_128: bool,
+        use_legacy_date_timestamp_or_ntz: bool,
+    ) -> Self {
+        let read_options = ReadOptions {
+            use_legacy_date_timestamp_or_ntz,
+        };
+        macro_rules! typed_reader {
+            ($reader_ty:ident, $arrow_ty:ident) => {
+                Self::$reader_ty(TypedColumnReader::new(
+                    desc,
+                    capacity,
+                    ArrowDataType::$arrow_ty,
+                    read_options,
+                ))
+            };
+            ($reader_ty:ident, $arrow_ty:expr) => {
+                Self::$reader_ty(TypedColumnReader::new(
+                    desc,
+                    capacity,
+                    $arrow_ty,
+                    read_options,
+                ))
+            };
+        }
+
+        match desc.physical_type() {
+            PhysicalType::BOOLEAN => typed_reader!(BoolColumnReader, Boolean),
+            PhysicalType::INT32 => {
+                if let Some(ref logical_type) = desc.logical_type() {
+                    match logical_type {
+                        lt @ LogicalType::Integer {
+                            bit_width,
+                            is_signed,
+                        } => match (bit_width, is_signed) {
+                            (8, true) => typed_reader!(Int8ColumnReader, Int8),
+                            (8, false) => typed_reader!(UInt8ColumnReader, Int16),
+                            (16, true) => typed_reader!(Int16ColumnReader, Int16),
+                            (16, false) => typed_reader!(UInt16ColumnReader, Int32),
+                            (32, true) => typed_reader!(Int32ColumnReader, Int32),
+                            (32, false) => typed_reader!(UInt32ColumnReader, Int64),
+                            _ => unimplemented!("Unsupported INT32 annotation: {:?}", lt),
+                        },
+                        LogicalType::Decimal { scale, precision } => {
+                            if use_decimal_128 {
+                                typed_reader!(
+                                    Int32DecimalColumnReader,
+                                    ArrowDataType::Decimal128(*precision as u8, *scale as i8)
+                                )
+                            } else {
+                                typed_reader!(Int32ColumnReader, Int32)
+                            }
+                        }
+                        LogicalType::Date => typed_reader!(Int32DateColumnReader, Date32),
+                        lt => unimplemented!("Unsupported logical type for INT32: {:?}", lt),
+                    }
+                } else {
+                    // We support type promotion from int to long
+                    match promotion_info.physical_type {
+                        PhysicalType::INT32 => typed_reader!(Int32ColumnReader, Int32),
+                        PhysicalType::INT64 => typed_reader!(Int32To64ColumnReader, Int64),
+                        t => unimplemented!("Unsupported read physical type for INT32: {}", t),
+                    }
+                }
+            }
+            PhysicalType::INT64 => {
+                if let Some(ref logical_type) = desc.logical_type() {
+                    match logical_type {
+                        lt @ LogicalType::Integer {
+                            bit_width,
+                            is_signed,
+                        } => match (bit_width, is_signed) {
+                            (64, true) => typed_reader!(Int64ColumnReader, Int64),
+                            (64, false) => typed_reader!(
+                                UInt64DecimalColumnReader,
+                                ArrowDataType::Decimal128(20u8, 0i8)
+                            ),
+                            _ => panic!("Unsupported INT64 annotation: {:?}", lt),
+                        },
+                        LogicalType::Decimal { scale, precision } => {
+                            if use_decimal_128 {
+                                typed_reader!(
+                                    Int64DecimalColumnReader,
+                                    ArrowDataType::Decimal128(*precision as u8, *scale as i8)
+                                )
+                            } else {
+                                typed_reader!(Int64ColumnReader, Int64)
+                            }
+                        }
+                        LogicalType::Timestamp {
+                            is_adjusted_to_u_t_c,
+                            unit,
+                        } => {
+                            // To be consistent with Spark, we always store as micro-second and
+                            // convert milli-second to it.
+                            let time_unit = TimeUnit::Microsecond;
+                            let time_zone = if *is_adjusted_to_u_t_c {
+                                Some("UTC".to_string().into())
+                            } else {
+                                None
+                            };
+                            match unit {
+                                ParquetTimeUnit::MILLIS(_) => {
+                                    typed_reader!(
+                                        Int64TimestampMillisColumnReader,
+                                        ArrowDataType::Timestamp(time_unit, time_zone)
+                                    )
+                                }
+                                ParquetTimeUnit::MICROS(_) => {
+                                    typed_reader!(
+                                        Int64TimestampMicrosColumnReader,
+                                        ArrowDataType::Timestamp(time_unit, time_zone)
+                                    )
+                                }
+                                ParquetTimeUnit::NANOS(_) => {
+                                    typed_reader!(
+                                        Int64TimestampNanosColumnReader,
+                                        ArrowDataType::Int64
+                                    )
+                                }
+                            }
+                        }
+                        lt => panic!("Unsupported logical type for INT64: {:?}", lt),
+                    }
+                } else {
+                    // By default it is INT(64, true)
+                    typed_reader!(Int64ColumnReader, Int64)
+                }
+            }
+            PhysicalType::INT96 => {
+                typed_reader!(
+                    Int96ColumnReader,
+                    ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string().into()))
+                )
+            }
+            PhysicalType::FLOAT => match promotion_info.physical_type {
+                // We support type promotion from float to double
+                PhysicalType::FLOAT => typed_reader!(FloatColumnReader, Float32),
+                PhysicalType::DOUBLE => typed_reader!(FloatToDoubleColumnReader, Float64),
+                t => panic!("Unsupported read physical type: {} for FLOAT", t),
+            },
+
+            PhysicalType::DOUBLE => typed_reader!(DoubleColumnReader, Float64),
+            PhysicalType::BYTE_ARRAY => {
+                if let Some(logical_type) = desc.logical_type() {
+                    match logical_type {
+                        LogicalType::String => typed_reader!(StringColumnReader, Utf8),
+                        // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+                        // "enum type should interpret ENUM annotated field as a UTF-8"
+                        LogicalType::Enum => typed_reader!(StringColumnReader, Utf8),
+                        lt => panic!("Unsupported logical type for BYTE_ARRAY: {:?}", lt),
+                    }
+                } else {
+                    typed_reader!(ByteArrayColumnReader, Binary)
+                }
+            }
+            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                if let Some(logical_type) = desc.logical_type() {
+                    match logical_type {
+                        LogicalType::Decimal { precision, scale } => {
+                            if !use_decimal_128 && precision <= DECIMAL_MAX_INT_DIGITS {
+                                typed_reader!(FLBADecimal32ColumnReader, Int32)
+                            } else if !use_decimal_128 && precision <= DECIMAL_MAX_LONG_DIGITS {
+                                typed_reader!(FLBADecimal64ColumnReader, Int64)
+                            } else {
+                                typed_reader!(
+                                    FLBADecimalColumnReader,
+                                    ArrowDataType::Decimal128(precision as u8, scale as i8)
+                                )
+                            }
+                        }
+                        LogicalType::Uuid => {
+                            let type_length = desc.type_length();
+                            typed_reader!(
+                                FLBAColumnReader,
+                                ArrowDataType::FixedSizeBinary(type_length)
+                            )
+                        }
+                        t => panic!("Unsupported logical type for FIXED_LEN_BYTE_ARRAY: {:?}", t),
+                    }
+                } else {
+                    let type_length = desc.type_length();
+                    typed_reader!(
+                        FLBAColumnReader,
+                        ArrowDataType::FixedSizeBinary(type_length)
+                    )
+                }
+            }
+        }
+    }
+}
+
+macro_rules! make_func {
+    ($self:ident, $func:ident $(,$args:ident)*) => ({
+        match *$self {
+            Self::BoolColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int8ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::UInt8ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int16ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::UInt16ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int32ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int32To64ColumnReader(ref typed) => typed.$func($($args), *),
+            Self::UInt32ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int32DateColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int32DecimalColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::UInt64DecimalColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64DecimalColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64TimestampMillisColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64TimestampMicrosColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64TimestampNanosColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FloatColumnReader(ref typed) => typed.$func($($args),*),
+            Self::DoubleColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FloatToDoubleColumnReader(ref typed) => typed.$func($($args),*),
+            Self::ByteArrayColumnReader(ref typed) => typed.$func($($args),*),
+            Self::StringColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FLBADecimalColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FLBADecimal32ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FLBADecimal64ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::FLBAColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int96ColumnReader(ref typed) => typed.$func($($args),*),
+        }
+    });
+}
+
+macro_rules! make_func_mut {
+    ($self:ident, $func:ident $(,$args:ident)*) => ({
+        match *$self {
+            Self::BoolColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int8ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::UInt8ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int16ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::UInt16ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int32ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int32To64ColumnReader(ref mut typed) => typed.$func($($args), *),
+            Self::UInt32ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int32DateColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int32DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::UInt64DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64TimestampMillisColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64TimestampMicrosColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64TimestampNanosColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FloatColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::DoubleColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FloatToDoubleColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::ByteArrayColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::StringColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FLBADecimalColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FLBADecimal32ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FLBADecimal64ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::FLBAColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int96ColumnReader(ref mut typed) => typed.$func($($args),*),
+        }
+    });
+}
+
+impl ColumnReader {
+    #[inline]
+    pub fn get_descriptor(&self) -> &ColumnDescriptor {
+        make_func!(self, get_descriptor)
+    }
+
+    #[inline]
+    pub fn set_dictionary_page(
+        &mut self,
+        page_value_count: usize,
+        page_data: Buffer,
+        encoding: Encoding,
+    ) {
+        make_func_mut!(
+            self,
+            set_dictionary_page,
+            page_value_count,
+            page_data,
+            encoding
+        )
+    }
+
+    #[inline]
+    pub fn set_page_v1(&mut self, page_value_count: usize, page_data: Buffer, encoding: Encoding) {
+        make_func_mut!(self, set_page_v1, page_value_count, page_data, encoding)
+    }
+
+    #[inline]
+    pub fn set_page_v2(
+        &mut self,
+        page_value_count: usize,
+        def_level_data: Buffer,
+        rep_level_data: Buffer,
+        value_data: Buffer,
+        encoding: Encoding,
+    ) {
+        make_func_mut!(
+            self,
+            set_page_v2,
+            page_value_count,
+            def_level_data,
+            rep_level_data,
+            value_data,
+            encoding
+        )
+    }
+
+    #[inline]
+    pub fn set_null(&mut self) {
+        make_func_mut!(self, set_null)
+    }
+
+    #[inline]
+    pub fn set_boolean(&mut self, value: bool) {
+        make_func_mut!(self, set_boolean, value)
+    }
+
+    #[inline]
+    pub fn set_fixed<U: ArrowNativeType + AsBytes>(&mut self, value: U) {
+        make_func_mut!(self, set_fixed, value)
+    }
+
+    #[inline]
+    pub fn set_binary(&mut self, value: MutableBuffer) {
+        make_func_mut!(self, set_binary, value)
+    }
+
+    #[inline]
+    pub fn set_decimal_flba(&mut self, value: MutableBuffer) {
+        make_func_mut!(self, set_decimal_flba, value)
+    }
+
+    #[inline]
+    pub fn set_position(&mut self, value: i64, size: usize) {
+        make_func_mut!(self, set_position, value, size)
+    }
+
+    #[inline]
+    pub fn set_is_deleted(&mut self, value: MutableBuffer) {
+        make_func_mut!(self, set_is_deleted, value)
+    }
+
+    #[inline]
+    pub fn reset_batch(&mut self) {
+        make_func_mut!(self, reset_batch)
+    }
+
+    #[inline]
+    pub fn current_batch(&mut self) -> ArrayData {
+        make_func_mut!(self, current_batch)
+    }
+
+    #[inline]
+    pub fn read_batch(&mut self, total: usize, null_pad_size: usize) -> (usize, usize) {
+        make_func_mut!(self, read_batch, total, null_pad_size)
+    }
+
+    #[inline]
+    pub fn skip_batch(&mut self, total: usize, put_nulls: bool) -> usize {
+        make_func_mut!(self, skip_batch, total, put_nulls)
+    }
+}
+
+/// A batched reader for a primitive Parquet column.
+pub struct TypedColumnReader<T: DataType> {
+    desc: ColumnDescPtr,
+    arrow_type: ArrowDataType,
+    rep_level_decoder: Option<LevelDecoder>,
+    def_level_decoder: Option<LevelDecoder>,
+    value_decoder: Option<Box<dyn Decoder>>,
+
+    /// The remaining number of values to read in the current page
+    num_values_in_page: usize,
+    /// The value vector for this column reader; reused across batches.
+    vector: ParquetMutableVector,
+    /// The batch size for this column vector.
+    capacity: usize,
+    /// Number of bits used to represent one value in Parquet.
+    bit_width: usize,
+    /// Whether this is a constant column reader (always return constant vector).
+    is_const: bool,
+
+    // Options for reading Parquet
+    read_options: ReadOptions,
+
+    /// Marker to allow `T` in the generic parameter of the struct.
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> TypedColumnReader<T> {
+    pub fn new(
+        desc: ColumnDescriptor,
+        capacity: usize,
+        arrow_type: ArrowDataType,
+        read_options: ReadOptions,
+    ) -> Self {
+        let vector = ParquetMutableVector::new(capacity, &arrow_type);
+        let bit_width = ParquetMutableVector::bit_width(&arrow_type);
+        Self {
+            desc: Arc::new(desc),
+            arrow_type,
+            rep_level_decoder: None,
+            def_level_decoder: None,
+            value_decoder: None,
+            num_values_in_page: 0,
+            vector,
+            capacity,
+            bit_width,
+            is_const: false,
+            read_options,
+            _phantom: PhantomData,
+        }
+    }
+
+    #[inline]
+    pub fn get_descriptor(&self) -> &ColumnDescriptor {
+        &self.desc
+    }
+
+    /// Reset the current batch. This will clear all the content of the current columnar vector as
+    /// well as reset all of its internal states.
+    #[inline]
+    pub fn reset_batch(&mut self) {
+        self.vector.reset()
+    }
+
+    /// Returns the current batch that's been constructed.
+    ///
+    /// Note: the caller must make sure the returned Arrow vector is fully consumed before calling
+    /// `read_batch` again.
+    #[inline]
+    pub fn current_batch(&mut self) -> ArrayData {
+        self.vector.get_array_data()
+    }
+
+    /// Reads a batch of at most `total` values from the current page this reader has. Returns a
+    /// tuple where the first element is the actual number of values read (including both nulls and
+    /// non-nulls), and the second element is the actual number of nulls read.
+    ///
+    /// Pad nulls for the amount of `null_pad_size` before reading.
+    ///
+    /// If the return number of values is < `total`, it means the current page is drained and the
+    /// caller should call `set_page_v1` or `set_page_v2` before calling next `read_batch`.
+    pub fn read_batch(&mut self, total: usize, null_pad_size: usize) -> (usize, usize) {
+        debug_assert!(
+            self.value_decoder.is_some() && self.def_level_decoder.is_some(),
+            "set_page_v1/v2 should have been called"
+        );
+
+        let n = ::std::cmp::min(self.num_values_in_page, total);
+        self.num_values_in_page -= n;
+        let value_decoder = self.value_decoder.as_mut().unwrap();
+        let dl_decoder = self.def_level_decoder.as_mut().unwrap();
+
+        let previous_num_nulls = self.vector.num_nulls;
+        self.vector.put_nulls(null_pad_size);
+        dl_decoder.read_batch(n, &mut self.vector, value_decoder.as_mut());
+
+        (n, self.vector.num_nulls - previous_num_nulls)
+    }
+
+    /// Skips a batch of at most `total` values from the current page this reader has, and returns
+    /// the actual number of values skipped.
+    ///
+    /// If the return value is < `total`, it means the current page is drained and the caller should
+    /// call `set_page_v1` or `set_page_v2` before calling next `skip_batch`.
+    pub fn skip_batch(&mut self, total: usize, put_nulls: bool) -> usize {
+        debug_assert!(
+            self.value_decoder.is_some() && self.def_level_decoder.is_some(),
+            "set_page_v1/v2 should have been called"
+        );
+
+        let n = ::std::cmp::min(self.num_values_in_page, total);
+        self.num_values_in_page -= n;
+        let value_decoder = self.value_decoder.as_mut().unwrap();
+        let dl_decoder = self.def_level_decoder.as_mut().unwrap();
+
+        dl_decoder.skip_batch(n, &mut self.vector, value_decoder.as_mut(), put_nulls);
+
+        n
+    }
+
+    /// Sets the dictionary page for this column reader and eagerly reads it.
+    ///
+    /// # Panics
+    ///
+    /// - If being called more than once during the lifetime of this column reader. A Parquet column
+    ///   chunk should only contain a single dictionary page.
+    /// - If the input `encoding` is neither `PLAIN`, `PLAIN_DICTIONARY` nor `RLE_DICTIONARY`.
+    pub fn set_dictionary_page(
+        &mut self,
+        page_value_count: usize,
+        page_data: Buffer,
+        mut encoding: Encoding,
+    ) {
+        // In Parquet format v1, both dictionary page and data page use the same encoding
+        // `PLAIN_DICTIONARY`, while in v2, dictioanry page uses `PLAIN` and data page uses
+        // `RLE_DICTIONARY`.
+        //
+        // Here, we convert `PLAIN` from v2 dictionary page to `PLAIN_DICTIONARY`, so that v1 and v2
+        // shares the same encoding. Later on, `get_decoder` will use the `PlainDecoder` for this
+        // case.
+        if encoding == Encoding::PLAIN {
+            encoding = Encoding::PLAIN_DICTIONARY;
+        }
+
+        if encoding != Encoding::PLAIN_DICTIONARY {
+            panic!("Invalid encoding type for Parquet dictionary: {}", encoding);
+        }
+
+        if self.vector.dictionary.is_some() {
+            panic!("Parquet column cannot have more than one dictionary");
+        }
+
+        // Create a new vector for dictionary values
+        let mut value_vector = ParquetMutableVector::new(page_value_count, &self.arrow_type);
+
+        let mut dictionary = self.get_decoder(page_data, page_value_count, encoding);
+        dictionary.read_batch(&mut value_vector, page_value_count);
+        value_vector.num_values = page_value_count;
+
+        // Re-create the parent vector since it is initialized with the dictionary value type, not
+        // the key type (which is always integer).
+        self.vector = ParquetMutableVector::new(self.capacity, &ArrowDataType::Int32);
+        self.vector.set_dictionary(value_vector);
+    }
+
+    /// Resets the Parquet data page for this column reader.
+    pub fn set_page_v1(
+        &mut self,
+        page_value_count: usize,
+        page_data: Buffer,
+        mut encoding: Encoding,
+    ) {
+        // In v1, when data is encoded with dictionary, data page uses `PLAIN_DICTIONARY`, while v2
+        // uses  `RLE_DICTIONARY`. To consolidate the two, here we convert `PLAIN_DICTIONARY` to
+        // `RLE_DICTIONARY` following v2. Later on, `get_decoder` will use `DictDecoder` for this
+        // case.
+        if encoding == Encoding::PLAIN_DICTIONARY {
+            encoding = Encoding::RLE_DICTIONARY;
+        }
+
+        self.num_values_in_page = page_value_count;
+        self.check_dictionary(&encoding);
+
+        let mut page_buffer = page_data;
+
+        let bit_width = log2(self.desc.max_rep_level() as u64 + 1) as u8;
+        let mut rl_decoder = LevelDecoder::new(self.desc.clone(), bit_width, true);
+        let offset = rl_decoder.set_data(page_value_count, &page_buffer);
+        self.rep_level_decoder = Some(rl_decoder);
+        page_buffer = page_buffer.slice(offset);
+
+        let bit_width = log2(self.desc.max_def_level() as u64 + 1) as u8;
+        let mut dl_decoder = LevelDecoder::new(self.desc.clone(), bit_width, true);
+        let offset = dl_decoder.set_data(page_value_count, &page_buffer);
+        self.def_level_decoder = Some(dl_decoder);
+        page_buffer = page_buffer.slice(offset);
+
+        let value_decoder = self.get_decoder(page_buffer, page_value_count, encoding);
+        self.value_decoder = Some(value_decoder);
+    }
+
+    /// Resets the Parquet data page for this column reader.
+    pub fn set_page_v2(
+        &mut self,
+        page_value_count: usize,
+        def_level_data: Buffer,
+        rep_level_data: Buffer,
+        value_data: Buffer,
+        encoding: Encoding,
+    ) {
+        self.num_values_in_page = page_value_count;
+        self.check_dictionary(&encoding);
+
+        let bit_width = log2(self.desc.max_rep_level() as u64 + 1) as u8;
+        let mut rl_decoder = LevelDecoder::new(self.desc.clone(), bit_width, false);
+        rl_decoder.set_data(page_value_count, &rep_level_data);
+        self.rep_level_decoder = Some(rl_decoder);
+
+        let bit_width = log2(self.desc.max_def_level() as u64 + 1) as u8;
+        let mut dl_decoder = LevelDecoder::new(self.desc.clone(), bit_width, false);
+        dl_decoder.set_data(page_value_count, &def_level_data);
+        self.def_level_decoder = Some(dl_decoder);
+
+        let value_decoder = self.get_decoder(value_data, page_value_count, encoding);
+        self.value_decoder = Some(value_decoder);
+    }
+
+    /// Sets all values in the vector of this column reader to be null.
+    pub fn set_null(&mut self) {
+        self.check_const("set_null");
+        self.vector.put_nulls(self.capacity);
+    }
+
+    /// Sets all values in the vector of this column reader to be `value`.
+    pub fn set_boolean(&mut self, value: bool) {
+        self.check_const("set_boolean");
+        if value {
+            let dst = self.vector.value_buffer.as_slice_mut();
+            bit::set_bits(dst, 0, self.capacity);
+        }
+        self.vector.num_values += self.capacity;
+    }
+
+    /// Sets all values in the vector of this column reader to be `value`.
+    pub fn set_fixed<U: ArrowNativeType + AsBytes>(&mut self, value: U) {
+        self.check_const("set_fixed");
+        let type_size = std::mem::size_of::<U>();
+
+        let mut offset = 0;
+        for _ in 0..self.capacity {
+            bit::memcpy_value(&value, type_size, &mut self.vector.value_buffer[offset..]);
+            offset += type_size;
+        }
+        self.vector.num_values += self.capacity;
+    }
+
+    /// Sets all values in the vector of this column reader to be binary represented by `buffer`.
+    pub fn set_binary(&mut self, buffer: MutableBuffer) {
+        self.check_const("set_binary");
+
+        // TODO: consider using dictionary here
+
+        let len = buffer.len();
+        let total_len = len * self.capacity;
+        let offset_buf = self.vector.value_buffer.as_slice_mut();
+        let child_vector = &mut self.vector.children[0];
+        let value_buf = &mut child_vector.value_buffer;
+
+        value_buf.resize(total_len);
+
+        let mut value_buf_offset = 0;
+        let mut offset_buf_offset = 4;
+        for _ in 0..self.capacity {
+            bit::memcpy(&buffer, &mut value_buf.as_slice_mut()[value_buf_offset..]);
+            value_buf_offset += len;
+
+            bit::memcpy_value(
+                &(value_buf_offset as i32),
+                4,
+                &mut offset_buf[offset_buf_offset..],
+            );
+            offset_buf_offset += 4;
+        }
+        self.vector.num_values += self.capacity;
+    }
+
+    /// Sets all values in the vector of this column reader to be decimal represented by `buffer`.
+    pub fn set_decimal_flba(&mut self, buffer: MutableBuffer) {
+        self.check_const("set_decimal_flba");
+
+        // TODO: consider using dictionary here
+
+        let len = buffer.len();
+        let mut bytes: [u8; DECIMAL_BYTE_WIDTH] = [0; DECIMAL_BYTE_WIDTH];
+
+        for i in 0..len {
+            bytes[len - i - 1] = buffer[i];
+        }
+        if bytes[len - 1] & 0x80 == 0x80 {
+            bytes[len..DECIMAL_BYTE_WIDTH].fill(0xff);
+        }
+
+        let mut offset = 0;
+        for _ in 0..self.capacity {
+            bit::memcpy(&bytes, &mut self.vector.value_buffer[offset..]);
+            offset += DECIMAL_BYTE_WIDTH;
+        }
+        self.vector.num_values += self.capacity;
+    }
+
+    /// Sets position values of this column reader to the vector starting from `value`.
+    pub fn set_position(&mut self, value: i64, size: usize) {
+        let i64_size = std::mem::size_of::<i64>();
+
+        let mut offset = self.vector.num_values * i64_size;
+        for i in value..(value + size as i64) {
+            // TODO: is it better to convert self.value_buffer to &mut [i64] and for-loop update?
+            bit::memcpy_value(&i, i64_size, &mut self.vector.value_buffer[offset..]);
+            offset += i64_size;
+        }
+        self.vector.num_values += size;
+    }
+
+    /// Sets the values in the vector of this column reader to be a boolean array represented
+    /// by `buffer`.
+    pub fn set_is_deleted(&mut self, buffer: MutableBuffer) {
+        let len = buffer.len();
+        let dst = self.vector.value_buffer.as_slice_mut();
+        for i in 0..len {
+            if buffer[i] == 1 {
+                bit::set_bit(dst, i);
+            } else if buffer[i] == 0 {
+                bit::unset_bit(dst, i);
+            }
+        }
+        self.vector.num_values += len;
+    }
+
+    /// Check a few pre-conditions for setting constants, as well as setting
+    /// that `is_const` to true for the particular column reader.
+    fn check_const(&mut self, method_name: &str) {
+        assert!(
+            self.value_decoder.is_none(),
+            "{} cannot be called after set_page_v1/set_page_v2!",
+            method_name
+        );
+        assert!(!self.is_const, "can only set constant once!");
+        self.is_const = true;
+    }
+
+    fn check_dictionary(&mut self, encoding: &Encoding) {
+        // The column has a dictionary while the new page is of PLAIN encoding. In this case, we
+        // should eagerly decode all the dictionary indices and convert the underlying vector to a
+        // plain encoded vector.
+        if self.vector.dictionary.is_some() && *encoding == Encoding::PLAIN {
+            let new_vector = ParquetMutableVector::new(self.capacity, &self.arrow_type);
+            let old_vector = std::mem::replace(&mut self.vector, new_vector);
+            T::decode_dict(old_vector, &mut self.vector, self.bit_width);
+            debug_assert!(self.vector.dictionary.is_none());
+        }
+    }
+
+    fn get_decoder(
+        &self,
+        value_data: Buffer,
+        page_value_count: usize,
+        encoding: Encoding,
+    ) -> Box<dyn Decoder> {
+        get_decoder::<T>(
+            value_data,
+            page_value_count,
+            encoding,
+            self.desc.clone(),
+            self.read_options,
+        )
+    }
+}
diff --git a/core/src/parquet/read/levels.rs b/core/src/parquet/read/levels.rs
new file mode 100644
index 000000000..d43c28684
--- /dev/null
+++ b/core/src/parquet/read/levels.rs
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::mem;
+
+use arrow::buffer::Buffer;
+use parquet::schema::types::ColumnDescPtr;
+
+use super::values::Decoder;
+use crate::{
+    common::bit::{self, read_num_bytes_u32, BitReader},
+    parquet::ParquetMutableVector,
+    unlikely,
+};
+
+const INITIAL_BUF_LEN: usize = 16;
+
+enum Mode {
+    RLE,
+    BitPacked,
+}
+
+/// A decoder for Parquet definition & repetition levels.
+pub struct LevelDecoder {
+    /// The descriptor of the column that this level decoder is associated to.
+    desc: ColumnDescPtr,
+    /// Number of bits used to represent the levels.
+    bit_width: u8,
+    /// Mode for the current run.
+    mode: Mode,
+    /// Total number of values (including both null and non-null) to be decoded.
+    num_values: usize,
+    /// The current value in a RLE run. Unused if BitPacked.
+    current_value: i32,
+    /// The number of total values in the current RLE run. Unused if BitPacked.
+    current_count: usize,
+    /// The current buffer used in a BitPacked run. Unused if RLE.
+    /// This will be resized if the total number of values in the BitPacked run is larger than its
+    /// capacity.
+    current_buffer: Vec<i32>, // TODO: double check this
+    /// The index into `current_buffer` in a BitPacked run. Unused if RLE.
+    current_buffer_idx: usize,
+    /// A bit reader wrapping the input buffer for levels.
+    bit_reader: Option<BitReader>,
+    /// Whether we need to read the length of data. This is typically true for Parquet page V1, but
+    /// not for V2, since it uses separate buffer for definition & repetition levels.
+    need_length: bool,
+}
+
+impl LevelDecoder {
+    pub fn new(desc: ColumnDescPtr, bit_width: u8, need_length: bool) -> Self {
+        Self {
+            desc,
+            bit_width,
+            mode: Mode::RLE,
+            num_values: 0,
+            current_value: 0,
+            current_count: 0,
+            current_buffer: vec![0; INITIAL_BUF_LEN],
+            current_buffer_idx: 0,
+            bit_reader: None,
+            need_length,
+        }
+    }
+
+    /// Sets data for this level decoder, and returns total number of bytes consumed. This is used
+    /// for reading DataPage v1 levels.
+    pub fn set_data(&mut self, page_value_count: usize, page_data: &Buffer) -> usize {
+        self.num_values = page_value_count;
+        if self.bit_width == 0 {
+            // Special case where the page doesn't have encoded rl/dl data. Here we'll treat it as
+            // an RLE run of `page_value_count` number of 0s.
+            self.mode = Mode::RLE;
+            self.current_count = page_value_count;
+            0
+        } else if self.need_length {
+            let u32_size = mem::size_of::<u32>();
+            let data_size = read_num_bytes_u32(u32_size, page_data.as_slice()) as usize;
+            self.bit_reader = Some(BitReader::new(page_data.slice(u32_size), data_size));
+            u32_size + data_size
+        } else {
+            // No need to read length, just read the whole buffer
+            self.bit_reader = Some(BitReader::new_all(page_data.to_owned()));
+            0
+        }
+    }
+
+    /// Reads a batch of `total` values into `vector`. The value decoding is done by
+    /// `value_decoder`.
+    pub fn read_batch(
+        &mut self,
+        total: usize,
+        vector: &mut ParquetMutableVector,
+        value_decoder: &mut dyn Decoder,
+    ) {
+        let mut left = total;
+        while left > 0 {
+            if unlikely(self.current_count == 0) {
+                self.read_next_group();
+            }
+
+            debug_assert!(self.current_count > 0);
+
+            let n = ::std::cmp::min(left, self.current_count);
+            let max_def_level = self.desc.max_def_level();
+
+            match self.mode {
+                Mode::RLE => {
+                    if self.current_value as i16 == max_def_level {
+                        value_decoder.read_batch(vector, n);
+                        bit::set_bits(vector.validity_buffer.as_slice_mut(), vector.num_values, n);
+                        vector.num_values += n;
+                    } else {
+                        vector.put_nulls(n);
+                    }
+                }
+                Mode::BitPacked => {
+                    for i in 0..n {
+                        if self.current_buffer[self.current_buffer_idx + i] == max_def_level as i32
+                        {
+                            value_decoder.read(vector);
+                            bit::set_bit(vector.validity_buffer.as_slice_mut(), vector.num_values);
+                            vector.num_values += 1;
+                        } else {
+                            vector.put_null();
+                        }
+                    }
+                    self.current_buffer_idx += n;
+                }
+            }
+
+            left -= n;
+            self.current_count -= n;
+        }
+    }
+
+    /// Skips a batch of `total` values. The value decoding is done by `value_decoder`.
+    pub fn skip_batch(
+        &mut self,
+        total: usize,
+        vector: &mut ParquetMutableVector,
+        value_decoder: &mut dyn Decoder,
+        put_nulls: bool,
+    ) {
+        let mut skip = total;
+        while skip > 0 {
+            if unlikely(self.current_count == 0) {
+                self.read_next_group();
+            }
+
+            debug_assert!(self.current_count > 0);
+
+            let n = ::std::cmp::min(skip, self.current_count);
+            let max_def_level = self.desc.max_def_level();
+
+            match self.mode {
+                Mode::RLE => {
+                    if self.current_value as i16 == max_def_level {
+                        value_decoder.skip_batch(n);
+                    }
+                }
+                Mode::BitPacked => {
+                    let mut num_skips = 0;
+                    for i in 0..n {
+                        if self.current_buffer[self.current_buffer_idx + i] == max_def_level as i32
+                        {
+                            num_skips += 1;
+                        }
+                    }
+                    value_decoder.skip_batch(num_skips);
+                    self.current_buffer_idx += n;
+                }
+            }
+            if put_nulls {
+                vector.put_nulls(n);
+            }
+
+            skip -= n;
+            self.current_count -= n;
+        }
+    }
+
+    /// Loads the next group from this RLE/BitPacked hybrid reader.
+    fn read_next_group(&mut self) {
+        let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
+        if let Some(indicator_value) = bit_reader.get_vlq_int() {
+            self.mode = if indicator_value & 1 == 1 {
+                Mode::BitPacked
+            } else {
+                Mode::RLE
+            };
+
+            match self.mode {
+                Mode::BitPacked => {
+                    self.current_count = ((indicator_value >> 1) * 8) as usize;
+                    if self.current_buffer.len() < self.current_count {
+                        self.current_buffer.resize(self.current_count, 0);
+                    }
+                    self.current_buffer_idx = 0;
+                    bit_reader.get_batch(
+                        &mut self.current_buffer[..self.current_count],
+                        self.bit_width as usize,
+                    );
+                }
+                Mode::RLE => {
+                    // RLE
+                    self.current_count = (indicator_value >> 1) as usize;
+                    let value_width = bit::ceil(self.bit_width as usize, 8);
+                    self.current_value = bit_reader
+                        .get_aligned::<i32>(value_width)
+                        .expect("current value should be set");
+                }
+            }
+        }
+    }
+}
diff --git a/core/src/parquet/read/mod.rs b/core/src/parquet/read/mod.rs
new file mode 100644
index 000000000..4d057a06c
--- /dev/null
+++ b/core/src/parquet/read/mod.rs
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod column;
+pub mod levels;
+pub mod values;
+
+pub use column::ColumnReader;
+use parquet::schema::types::ColumnDescPtr;
+
+use super::ParquetMutableVector;
+use crate::common::bit::{self, BitReader};
+use arrow::buffer::Buffer;
+use bytes::Buf;
+
+/// Number of bytes to store a decimal value in Arrow value vector
+pub(crate) const DECIMAL_BYTE_WIDTH: usize = 16;
+
+#[derive(Clone, Copy)]
+pub struct ReadOptions {
+    // Whether to read legacy dates/timestamps as it is. If false, throw exceptions.
+    pub(crate) use_legacy_date_timestamp_or_ntz: bool,
+}
+
+/// Internal states for PLAIN decoder. Used in combination of `PlainDecoding`.
+pub struct PlainDecoderInner {
+    /// The input buffer containing values to be decoded
+    data: Buffer,
+
+    /// The current offset in `data`, in bytes.
+    offset: usize,
+
+    /// The number of total values in `data`
+    value_count: usize,
+
+    /// Reads `data` bit by bit, used if `T` is [`BoolType`].
+    bit_reader: BitReader,
+
+    /// Options for reading Parquet
+    read_options: ReadOptions,
+
+    /// The Parquet column descriptor
+    desc: ColumnDescPtr,
+}
+
+/// A trait for [`super::DataType`] to implement how PLAIN encoded data is to be decoded into Arrow
+/// format given an input and output buffer.
+///
+/// The actual implementations of this trait is in `read/values.rs`.
+pub trait PlainDecoding {
+    /// Decodes `num` of items from `src`, and store the result into `dst`, in Arrow format.
+    ///
+    /// Note: this assumes the `src` has data for at least `num` elements, and won't do any
+    /// bound checking. The condition MUST be guaranteed from the caller side.
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize);
+
+    /// Skip `num` of items from `src`
+    ///
+    /// Note: this assumes the `src` has data for at least `num` elements, and won't do any
+    /// bound checking. The condition MUST be guaranteed from the caller side.
+    fn skip(src: &mut PlainDecoderInner, num: usize);
+}
+
+pub trait PlainDictDecoding {
+    /// Eagerly decode vector `src` which must have dictionary associated. The decoded values are
+    /// appended into `dst`.
+    fn decode_dict(src: ParquetMutableVector, dst: &mut ParquetMutableVector, bit_width: usize) {
+        assert!(dst.dictionary.is_none());
+        assert!(src.dictionary.is_some());
+
+        let mut value_buf = src.value_buffer.as_slice();
+        let validity_buf = src.validity_buffer.as_slice();
+        let dictionary = src.dictionary.as_ref().unwrap();
+
+        for i in 0..src.num_values {
+            if bit::get_bit(validity_buf, i) {
+                // non-null value: lookup the value position and copy its value into `dst`
+                let val_idx = value_buf.get_u32_le();
+                Self::decode_dict_one(i, val_idx as usize, dictionary, dst, bit_width);
+                dst.num_values += 1;
+            } else {
+                value_buf.advance(4);
+                dst.put_null();
+            }
+        }
+
+        dst.validity_buffer = src.validity_buffer;
+    }
+
+    /// Decode a single value from `src`, whose position in the dictionary indices (i.e., keys)
+    /// is `idx` and the positions in the dictionary values is `val_idx`. The decoded value is
+    /// appended to `dst`.
+    fn decode_dict_one(
+        idx: usize,
+        val_idx: usize,
+        src: &ParquetMutableVector,
+        dst: &mut ParquetMutableVector,
+        bit_width: usize,
+    );
+}
diff --git a/core/src/parquet/read/values.rs b/core/src/parquet/read/values.rs
new file mode 100644
index 000000000..9d9bbb3c9
--- /dev/null
+++ b/core/src/parquet/read/values.rs
@@ -0,0 +1,1008 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{marker::PhantomData, mem, ptr::copy_nonoverlapping};
+
+use arrow::buffer::Buffer;
+use bytes::Buf;
+use log::debug;
+use parquet::{basic::Encoding, schema::types::ColumnDescPtr};
+
+use super::{PlainDecoderInner, PlainDecoding, PlainDictDecoding, ReadOptions};
+use crate::{
+    common::bit::{self, BitReader},
+    parquet::{data_type::*, read::DECIMAL_BYTE_WIDTH, ParquetMutableVector},
+    unlikely,
+};
+
+pub fn get_decoder<T: DataType>(
+    value_data: Buffer,
+    num_values: usize,
+    encoding: Encoding,
+    desc: ColumnDescPtr,
+    read_options: ReadOptions,
+) -> Box<dyn Decoder> {
+    let decoder: Box<dyn Decoder> = match encoding {
+        Encoding::PLAIN | Encoding::PLAIN_DICTIONARY => Box::new(PlainDecoder::<T>::new(
+            value_data,
+            num_values,
+            desc,
+            read_options,
+        )),
+        // This is for dictionary indices
+        Encoding::RLE_DICTIONARY => Box::new(DictDecoder::new(value_data, num_values)),
+        _ => panic!("Unsupported encoding: {}", encoding),
+    };
+    decoder
+}
+
+/// A Parquet decoder for values within a Parquet data page.
+pub trait Decoder {
+    /// Consumes a single value from the decoder and stores it into `dst`.
+    ///
+    /// # Preconditions
+    ///
+    /// * `dst` have enough length to hold at least one value.
+    /// * `data` of this decoder should have enough bytes left to be decoded.
+    fn read(&mut self, dst: &mut ParquetMutableVector);
+
+    /// Consumes a batch of `num` values from the data and stores  them to `dst`.
+    ///
+    /// # Preconditions
+    ///
+    /// * `dst` should have length >= `num * T::type_size()` .
+    /// * `data` of this decoder should have >= `num * T::type_size()` bytes left to be decoded.
+    fn read_batch(&mut self, dst: &mut ParquetMutableVector, num: usize);
+
+    /// Skips a batch of `num` values from the data.
+    ///
+    /// # Preconditions
+    ///
+    /// * `data` of this decoder should have >= `num * T::type_size()` bytes left to be decoded.
+    fn skip_batch(&mut self, num: usize);
+
+    /// Returns the encoding for this decoder.
+    fn encoding(&self) -> Encoding;
+}
+
+/// The switch off date between Julian and Gregorian calendar. See
+///   https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html
+const JULIAN_GREGORIAN_SWITCH_OFF_DAY: i32 = -141427;
+
+/// The switch off timestamp (in micros) between Julian and Gregorian calendar. See
+///   https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html
+const JULIAN_GREGORIAN_SWITCH_OFF_TS: i64 = -2208988800000000;
+
+/// See http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian
+/// Also see Spark's `DateTimeUtils.JULIAN_DAY_OF_EPOCH`
+const JULIAN_DAY_OF_EPOCH: i32 = 2440588;
+
+/// Number of micro seconds per milli second.
+const MICROS_PER_MILLIS: i64 = 1000;
+
+const MICROS_PER_DAY: i64 = 24_i64 * 60 * 60 * 1000 * 1000;
+
+pub struct PlainDecoder<T: DataType> {
+    /// Internal states for this decoder.
+    inner: PlainDecoderInner,
+
+    /// Marker to allow `T` in the generic parameter of the struct.
+    _phantom: PhantomData<T>,
+}
+
+impl<T: DataType> PlainDecoder<T> {
+    pub fn new(
+        value_data: Buffer,
+        num_values: usize,
+        desc: ColumnDescPtr,
+        read_options: ReadOptions,
+    ) -> Self {
+        let len = value_data.len();
+        let inner = PlainDecoderInner {
+            data: value_data.clone(),
+            offset: 0,
+            value_count: num_values,
+            bit_reader: BitReader::new(value_data, len),
+            read_options,
+            desc,
+        };
+        Self {
+            inner,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+macro_rules! make_plain_default_impl {
+    ($($ty: ident), *) => {
+        $(
+            impl PlainDecoding for $ty {
+                /// Default implementation for PLAIN encoding. Uses `mempcy` when the physical
+                /// layout is the same between Parquet and Arrow.
+                fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                    let src_data = &src.data;
+                    let byte_width = src.desc.type_length() as usize;
+                    let num_bytes = byte_width * num;
+                    let dst_offset = byte_width * dst.num_values;
+
+                    bit::memcpy(
+                        &src_data[src.offset..src.offset + num_bytes],
+                        &mut dst.value_buffer[dst_offset..]);
+                    src.offset += num_bytes;
+                }
+
+                fn skip(src: &mut PlainDecoderInner, num: usize) {
+                    let num_bytes = src.desc.type_length() as usize * num;
+                    src.offset += num_bytes;
+                }
+            }
+        )*
+    };
+}
+
+make_plain_default_impl! { Int32Type, Int64Type, FloatType, DoubleType, FLBAType }
+
+macro_rules! make_plain_dict_impl {
+    ($($ty: ident), *) => {
+        $(
+            impl PlainDictDecoding for $ty {
+                fn decode_dict_one(
+                    idx: usize,
+                    val_idx: usize,
+                    src: &ParquetMutableVector,
+                    dst: &mut ParquetMutableVector,
+                    bit_width: usize,
+                ) {
+                    let byte_width = bit_width / 8;
+                    bit::memcpy(
+                        &src.value_buffer[val_idx * byte_width..(val_idx+1) * byte_width],
+                        &mut dst.value_buffer[idx * byte_width..],
+                    );
+                }
+            }
+        )*
+    };
+}
+
+make_plain_dict_impl! { Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type, UInt32Type }
+make_plain_dict_impl! { Int32DateType, Int64Type, FloatType, FLBAType }
+make_plain_dict_impl! { DoubleType, Int64TimestampMillisType, Int64TimestampMicrosType }
+
+impl PlainDecoding for Int32To64Type {
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_ptr = src.data.as_ptr() as *const i32;
+        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut i64;
+        unsafe {
+            for i in 0..num {
+                dst_ptr
+                    .add(dst.num_values + i)
+                    .write_unaligned(src_ptr.add(src.offset + i).read_unaligned() as i64);
+            }
+        }
+        src.offset += 4 * num;
+    }
+
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        src.offset += 4 * num;
+    }
+}
+
+impl PlainDictDecoding for Int32To64Type {
+    fn decode_dict_one(
+        idx: usize,
+        val_idx: usize,
+        src: &ParquetMutableVector,
+        dst: &mut ParquetMutableVector,
+        _: usize,
+    ) {
+        let src_ptr = src.value_buffer.as_ptr() as *const i32;
+        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut i64;
+        unsafe {
+            dst_ptr
+                .add(idx)
+                .write_unaligned(src_ptr.add(val_idx).read_unaligned() as i64);
+        }
+    }
+}
+
+impl PlainDecoding for FloatToDoubleType {
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_ptr = src.data.as_ptr() as *const f32;
+        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut f64;
+        unsafe {
+            for i in 0..num {
+                dst_ptr
+                    .add(dst.num_values + i)
+                    .write_unaligned(src_ptr.add(src.offset + i).read_unaligned() as f64);
+            }
+        }
+        src.offset += 4 * num;
+    }
+
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        src.offset += 4 * num;
+    }
+}
+
+impl PlainDictDecoding for FloatToDoubleType {
+    fn decode_dict_one(
+        idx: usize,
+        val_idx: usize,
+        src: &ParquetMutableVector,
+        dst: &mut ParquetMutableVector,
+        _: usize,
+    ) {
+        let src_ptr = src.value_buffer.as_ptr() as *const f32;
+        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut f64;
+        unsafe {
+            dst_ptr
+                .add(idx)
+                .write_unaligned(src_ptr.add(val_idx).read_unaligned() as f64);
+        }
+    }
+}
+
+impl PlainDecoding for Int32DateType {
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_data = &src.data;
+        let byte_width = src.desc.type_length() as usize;
+        let num_bytes = byte_width * num;
+        let dst_offset = byte_width * dst.num_values;
+
+        if !src.read_options.use_legacy_date_timestamp_or_ntz {
+            // By default we panic if the date value is before the switch date between Julian
+            // calendar and Gregorian calendar, which is 1582-10-15, and -141727 days
+            // before the unix epoch date 1970-01-01.
+            let mut offset = src.offset;
+            for _ in 0..num {
+                let v = &src_data[offset..offset + byte_width] as *const [u8] as *const u8
+                    as *const i32;
+
+                // TODO: optimize this further as checking value one by one is not very efficient
+                unsafe {
+                    if unlikely(v.read_unaligned() < JULIAN_GREGORIAN_SWITCH_OFF_DAY) {
+                        panic!(
+                        "Encountered date value {}, which is before 1582-10-15 (counting backwards \
+                         from Unix eopch date 1970-01-01), and could be ambigous depending on \
+                         whether a legacy Julian/Gregorian hybrid calendar is used, or a Proleptic \
+                         Gregorian calendar is used.",
+                        *v
+                    );
+                    }
+                }
+
+                offset += byte_width;
+            }
+        }
+
+        bit::memcpy(
+            &src_data[src.offset..src.offset + num_bytes],
+            &mut dst.value_buffer[dst_offset..],
+        );
+
+        src.offset += num_bytes;
+    }
+
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        let num_bytes = src.desc.type_length() as usize * num;
+        src.offset += num_bytes;
+    }
+}
+
+impl PlainDecoding for Int64TimestampMillisType {
+    #[inline]
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_data = &src.data;
+        let byte_width = src.desc.type_length() as usize;
+        let num_bytes = byte_width * num;
+
+        if !src.read_options.use_legacy_date_timestamp_or_ntz {
+            let mut offset = src.offset;
+            for _ in 0..num {
+                unsafe {
+                    let v = &src_data[offset..offset + byte_width] as *const [u8] as *const u8
+                        as *const i64;
+                    let v = v.read_unaligned() * MICROS_PER_MILLIS;
+
+                    // TODO: optimize this further as checking value one by one is not very
+                    // efficient
+                    if unlikely(v < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
+                        panic!(
+                            "Encountered timestamp value {}, which is before 1582-10-15 (counting \
+                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
+                         or a Proleptic Gregorian calendar is used.",
+                            v
+                        );
+                    }
+
+                    offset += byte_width;
+                }
+            }
+        }
+
+        unsafe {
+            let mut offset = src.offset;
+            let mut dst_offset = byte_width * dst.num_values;
+            for _ in 0..num {
+                let v = &src_data[offset..offset + byte_width] as *const [u8] as *const u8
+                    as *const i64;
+                let v = v.read_unaligned() * MICROS_PER_MILLIS;
+                bit::memcpy_value(&v, byte_width, &mut dst.value_buffer[dst_offset..]);
+                offset += byte_width;
+                dst_offset += byte_width;
+            }
+        }
+
+        src.offset += num_bytes;
+    }
+
+    #[inline]
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        let num_bytes = src.desc.type_length() as usize * num;
+        src.offset += num_bytes;
+    }
+}
+
+impl PlainDecoding for Int64TimestampMicrosType {
+    #[inline]
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_data = &src.data;
+        let byte_width = src.desc.type_length() as usize;
+        let num_bytes = byte_width * num;
+        let dst_offset = byte_width * dst.num_values;
+
+        if !src.read_options.use_legacy_date_timestamp_or_ntz {
+            let mut offset = src.offset;
+            for _ in 0..num {
+                unsafe {
+                    let v = &src_data[offset..offset + byte_width] as *const [u8] as *const u8
+                        as *const i64;
+
+                    // TODO: optimize this further as checking value one by one is not very
+                    // efficient
+                    if unlikely(v.read_unaligned() < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
+                        panic!(
+                            "Encountered timestamp value {}, which is before 1582-10-15 (counting \
+                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
+                         or a Proleptic Gregorian calendar is used.",
+                            *v
+                        );
+                    }
+
+                    offset += byte_width;
+                }
+            }
+        }
+
+        bit::memcpy(
+            &src_data[src.offset..src.offset + num_bytes],
+            &mut dst.value_buffer[dst_offset..],
+        );
+
+        src.offset += num_bytes;
+    }
+
+    #[inline]
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        let num_bytes = src.desc.type_length() as usize * num;
+        src.offset += num_bytes;
+    }
+}
+
+impl PlainDecoding for BoolType {
+    /// Specific implementation for PLAIN encoding of boolean type. Even though both Parquet and
+    /// Arrow share the same physical layout for the type (which is 1 bit for each value), we'll
+    /// need to treat the number of bytes specifically.
+    #[inline]
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        src.bit_reader
+            .get_bits(&mut dst.value_buffer, dst.num_values, num);
+    }
+
+    #[inline]
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        src.bit_reader.skip_bits(num);
+    }
+}
+
+// Does it make sense to encode booleans with dictionary?
+impl PlainDictDecoding for BoolType {
+    #[inline]
+    fn decode_dict_one(
+        idx: usize,
+        val_idx: usize,
+        src: &ParquetMutableVector,
+        dst: &mut ParquetMutableVector,
+        _: usize,
+    ) {
+        let v = bit::get_bit(src.value_buffer.as_slice(), val_idx);
+        if v {
+            bit::set_bit(dst.value_buffer.as_slice_mut(), idx);
+        } // `dst` should be zero initialized so no need to call `unset_bit`.
+    }
+}
+
+// Shared implementation for int variants such as Int8 and Int16
+macro_rules! make_int_variant_impl {
+    ($ty: ident, $native_ty: ty, $type_size: expr) => {
+        impl PlainDecoding for $ty {
+            fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                let num_bytes = 4 * num; // Parquet stores Int8/Int16 using 4 bytes
+
+                let src_data = &src.data;
+                let mut src_offset = src.offset;
+                let dst_slice = dst.value_buffer.as_slice_mut();
+                let mut dst_offset = dst.num_values * $type_size;
+
+                let mut i = 0;
+                let mut in_ptr = &src_data[src_offset..] as *const [u8] as *const u8 as *const u32;
+
+                while num - i >= 32 {
+                    unsafe {
+                        let in_slice = std::slice::from_raw_parts(in_ptr, 32);
+
+                        for n in 0..32 {
+                            copy_nonoverlapping(
+                                in_slice[n..].as_ptr() as *const $native_ty,
+                                &mut dst_slice[dst_offset] as *mut u8 as *mut $native_ty,
+                                1,
+                            );
+                            i += 1;
+                            dst_offset += $type_size;
+                        }
+                        in_ptr = in_ptr.offset(32);
+                    }
+                }
+
+                src_offset += i * 4;
+
+                (0..(num - i)).for_each(|_| {
+                    unsafe {
+                        copy_nonoverlapping(
+                            &src_data[src_offset..] as *const [u8] as *const u8
+                                as *const $native_ty,
+                            &mut dst_slice[dst_offset] as *mut u8 as *mut $native_ty,
+                            1,
+                        );
+                    }
+                    src_offset += 4;
+                    dst_offset += $type_size;
+                });
+
+                src.offset += num_bytes;
+            }
+
+            fn skip(src: &mut PlainDecoderInner, num: usize) {
+                let num_bytes = 4 * num; // Parquet stores Int8/Int16 using 4 bytes
+                src.offset += num_bytes;
+            }
+        }
+    };
+}
+
+make_int_variant_impl!(Int8Type, i8, 1);
+make_int_variant_impl!(UInt8Type, u8, 2);
+make_int_variant_impl!(Int16Type, i16, 2);
+make_int_variant_impl!(UInt16Type, u16, 4);
+make_int_variant_impl!(UInt32Type, u32, 8);
+
+// Shared implementation for variants of Binary type
+macro_rules! make_plain_binary_impl {
+    ($($ty: ident), *) => {
+        $(
+            impl PlainDecoding for $ty {
+                fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                    let src_data = &src.data;
+                    let mut src_offset = src.offset;
+
+                    let mut offset_offset = dst.num_values * 4;
+                    let offset_buf = &mut dst.value_buffer.as_slice_mut();
+                    let mut offset_value = read_num_bytes!(i32, 4, &offset_buf[offset_offset..]);
+                    offset_offset += 4;
+
+                    // The actual content of a byte array is stored contiguously in the child vector
+                    let child = &mut dst.children[0];
+                    let mut value_offset = child.num_values; // num_values == num of bytes
+
+                    (0..num).for_each(|_| {
+                        let len = read_num_bytes!(i32, 4, &src_data[src_offset..]) as usize;
+                        offset_value += len as i32;
+
+                        // Copy offset for the current string value into the offset buffer
+                        bit::memcpy_value(&offset_value, 4, &mut offset_buf[offset_offset..]);
+
+                        // Reserve additional space in child value buffer if not enough
+                        let value_buf_len = child.value_buffer.len();
+
+                        if unlikely(value_buf_len < value_offset + len) {
+                            let new_capacity = ::std::cmp::max(value_offset + len, value_buf_len * 2);
+                            debug!("Reserving additional space ({} -> {} bytes) for value buffer",
+                                   value_buf_len, new_capacity);
+                            child.value_buffer.resize(new_capacity);
+                        }
+
+                        // Copy the actual string content into the value buffer
+                        src_offset += mem::size_of::<u32>();
+                        bit::memcpy(
+                            &src_data[src_offset..src_offset + len],
+                            &mut child.value_buffer.as_slice_mut()[value_offset..],
+                        );
+
+                        value_offset += len;
+                        src_offset += len;
+                        offset_offset += 4;
+                    });
+
+                    src.offset = src_offset;
+                    child.num_values = value_offset;
+                }
+
+                fn skip(src: &mut PlainDecoderInner, num: usize) {
+                    let src_data = &src.data;
+                    let mut src_offset = src.offset;
+
+                    (0..num).for_each(|_| {
+                        let len = read_num_bytes!(i32, 4, &src_data[src_offset..]) as usize;
+                        src_offset += mem::size_of::<u32>();
+                        src_offset += len;
+                    });
+
+                    src.offset = src_offset;
+                }
+            }
+        )*
+    };
+}
+
+make_plain_binary_impl! { ByteArrayType, StringType }
+
+macro_rules! make_plain_dict_binary_impl {
+    ($($ty: ident), *) => {
+        $(
+            impl PlainDictDecoding for $ty {
+                #[inline]
+                fn decode_dict_one(
+                    idx: usize,
+                    val_idx: usize,
+                    src: &ParquetMutableVector,
+                    dst: &mut ParquetMutableVector,
+                    _: usize,
+                ) {
+                    debug_assert!(src.children.len() == 1);
+                    debug_assert!(dst.children.len() == 1);
+
+                    let src_child = &src.children[0];
+                    let dst_child = &mut dst.children[0];
+
+                    // get the offset & data for the binary value at index `val_idx`
+                    let mut start_slice = &src.value_buffer[val_idx * 4..];
+                    let start = start_slice.get_u32_le() as usize;
+                    let mut end_slice = &src.value_buffer[(val_idx + 1) * 4..];
+                    let end = end_slice.get_u32_le() as usize;
+
+                    debug_assert!(end >= start);
+
+                    let len = end - start;
+                    let curr_offset = read_num_bytes!(u32, 4, &dst.value_buffer[idx * 4..]) as usize;
+
+                    // Reserve additional space in child value buffer if not enough
+                    let value_buf_len = dst_child.value_buffer.len();
+
+                    if unlikely(value_buf_len < curr_offset + len) {
+                        let new_capacity = ::std::cmp::max(curr_offset + len, value_buf_len * 2);
+                        debug!("Reserving additional space ({} -> {} bytes) for value buffer \
+                                during dictionary fallback", value_buf_len,
+                               new_capacity);
+                        dst_child.value_buffer.resize(new_capacity);
+                    }
+
+                    bit::memcpy(
+                        &src_child.value_buffer[start..end],
+                        &mut dst_child.value_buffer[curr_offset..],
+                    );
+
+                    bit::memcpy_value(
+                        &((curr_offset + len) as u32),
+                        4,
+                        &mut dst.value_buffer[(idx + 1) * 4..],
+                    );
+
+                    dst_child.num_values += len;
+                }
+            }
+        )*
+    };
+}
+
+make_plain_dict_binary_impl! { ByteArrayType, StringType }
+
+macro_rules! make_plain_decimal_impl {
+    ($is_signed: expr, $($ty: ident; $need_convert: expr), *) => {
+        $(
+            impl PlainDecoding for $ty {
+                fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                    let byte_width = src.desc.type_length() as usize;
+
+                    let src_data = &src.data[src.offset..];
+                    let dst_data = &mut dst.value_buffer[dst.num_values * DECIMAL_BYTE_WIDTH..];
+
+                    let mut src_offset = 0;
+                    let mut dst_offset = 0;
+
+                    debug_assert!(byte_width <= DECIMAL_BYTE_WIDTH);
+
+                    for _ in 0..num {
+                        let s = &mut dst_data[dst_offset..];
+
+                        bit::memcpy(
+                            &src_data[src_offset..src_offset + byte_width],
+                            s,
+                        );
+
+                        // Swap the order of bytes to make it little-endian.
+                        if $need_convert {
+                            for i in 0..byte_width / 2 {
+                                s.swap(i, byte_width - i - 1);
+                            }
+                        }
+
+                        if $is_signed {
+                            // Check if the most significant bit is 1 (negative in 2's complement).
+                            // If so, also fill pad the remaining bytes with 0xff.
+                            if s[byte_width - 1] & 0x80 == 0x80 {
+                                s[byte_width..DECIMAL_BYTE_WIDTH].fill(0xff);
+                            }
+                        }
+
+                        src_offset += byte_width;
+                        dst_offset += DECIMAL_BYTE_WIDTH;
+                    }
+
+                    src.offset += num * byte_width;
+                }
+
+                #[inline]
+                fn skip(src: &mut PlainDecoderInner, num: usize) {
+                    let num_bytes_to_skip = num * src.desc.type_length() as usize;
+                    src.offset += num_bytes_to_skip;
+                }
+            }
+
+            impl PlainDictDecoding for $ty {
+                fn decode_dict_one(_: usize, val_idx: usize, src: &ParquetMutableVector, dst: &mut ParquetMutableVector, _: usize) {
+                    let src_offset = val_idx * DECIMAL_BYTE_WIDTH;
+                    let dst_offset = dst.num_values * DECIMAL_BYTE_WIDTH;
+
+                    bit::memcpy(
+                        &src.value_buffer[src_offset..src_offset + DECIMAL_BYTE_WIDTH],
+                        &mut dst.value_buffer[dst_offset..dst_offset + DECIMAL_BYTE_WIDTH],
+                    );
+                }
+            }
+        )*
+    }
+}
+
+make_plain_decimal_impl!(true, Int32DecimalType; false, Int64DecimalType; false, FLBADecimalType; true);
+make_plain_decimal_impl!(false, UInt64Type; false);
+
+macro_rules! make_plain_decimal_int_impl {
+    ($($ty: ident; $num_bytes: expr), *) => {
+        $(
+            impl PlainDecoding for $ty {
+                fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                    let byte_width = src.desc.type_length() as usize;
+                    let num_bits = 8 * byte_width;
+
+                    let src_data = &src.data[src.offset..];
+                    let dst_data = &mut dst.value_buffer[dst.num_values * $num_bytes..];
+
+                    let mut src_offset = 0;
+
+                    for i in 0..num {
+                        let mut unscaled: i64 = 0;
+                        for _ in 0..byte_width {
+                            unscaled = unscaled << 8 | src_data[src_offset] as i64;
+                            src_offset += 1;
+                        }
+                        unscaled = (unscaled << (64 - num_bits)) >> (64 - num_bits);
+                        bit::memcpy_value(&unscaled, $num_bytes, &mut dst_data[i *
+                        $num_bytes..]);
+                    }
+
+                    src.offset += num * byte_width;
+                }
+
+                fn skip(src: &mut PlainDecoderInner, num: usize) {
+                    let num_bytes_to_skip = num * src.desc.type_length() as usize;
+                    src.offset += num_bytes_to_skip;
+                }
+            }
+
+            impl PlainDictDecoding for $ty {
+                #[inline]
+                fn decode_dict_one(_: usize, val_idx: usize, src: &ParquetMutableVector, dst: &mut ParquetMutableVector, _: usize) {
+                    bit::memcpy(
+                        &src.value_buffer[val_idx * $num_bytes..(val_idx + 1) * $num_bytes],
+                        &mut dst.value_buffer[dst.num_values * $num_bytes..],
+                    );
+                }
+            }
+        )*
+    };
+}
+
+make_plain_decimal_int_impl!(FLBADecimal32Type; 4, FLBADecimal64Type; 8);
+
+// Int96 contains 12 bytes
+const INT96_SRC_BYTE_WIDTH: usize = 12;
+// We convert INT96 to micros and store using i64
+const INT96_DST_BYTE_WIDTH: usize = 8;
+
+/// Decode timestamps represented as INT96 into i64 with micros precision
+impl PlainDecoding for Int96TimestampMicrosType {
+    #[inline]
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_data = &src.data;
+
+        if !src.read_options.use_legacy_date_timestamp_or_ntz {
+            let mut offset = src.offset;
+            for _ in 0..num {
+                let v = &src_data[offset..offset + INT96_SRC_BYTE_WIDTH];
+                let nanos = &v[..INT96_DST_BYTE_WIDTH] as *const [u8] as *const u8 as *const i64;
+                let day = &v[INT96_DST_BYTE_WIDTH..] as *const [u8] as *const u8 as *const i32;
+
+                // TODO: optimize this further as checking value one by one is not very efficient
+                unsafe {
+                    let micros = (day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64
+                        * MICROS_PER_DAY
+                        + nanos.read_unaligned() / 1000;
+
+                    if unlikely(micros < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
+                        panic!(
+                            "Encountered timestamp value {}, which is before 1582-10-15 (counting \
+                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
+                         or a Proleptic Gregorian calendar is used.",
+                            micros
+                        );
+                    }
+
+                    offset += INT96_SRC_BYTE_WIDTH;
+                }
+            }
+        }
+
+        let mut offset = src.offset;
+        let mut dst_offset = INT96_DST_BYTE_WIDTH * dst.num_values;
+        unsafe {
+            for _ in 0..num {
+                let v = &src_data[offset..offset + INT96_SRC_BYTE_WIDTH];
+                let nanos = &v[..INT96_DST_BYTE_WIDTH] as *const [u8] as *const u8 as *const i64;
+                let day = &v[INT96_DST_BYTE_WIDTH..] as *const [u8] as *const u8 as *const i32;
+
+                let micros = (day.read_unaligned() - JULIAN_DAY_OF_EPOCH) as i64 * MICROS_PER_DAY
+                    + nanos.read_unaligned() / 1000;
+
+                bit::memcpy_value(
+                    &micros,
+                    INT96_DST_BYTE_WIDTH,
+                    &mut dst.value_buffer[dst_offset..],
+                );
+
+                offset += INT96_SRC_BYTE_WIDTH;
+                dst_offset += INT96_DST_BYTE_WIDTH;
+            }
+        }
+
+        src.offset = offset;
+    }
+
+    #[inline]
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        src.offset += INT96_SRC_BYTE_WIDTH * num;
+    }
+}
+
+impl PlainDictDecoding for Int96TimestampMicrosType {
+    fn decode_dict_one(
+        _: usize,
+        val_idx: usize,
+        src: &ParquetMutableVector,
+        dst: &mut ParquetMutableVector,
+        _: usize,
+    ) {
+        let src_offset = val_idx * INT96_DST_BYTE_WIDTH;
+        let dst_offset = dst.num_values * INT96_DST_BYTE_WIDTH;
+
+        bit::memcpy(
+            &src.value_buffer[src_offset..src_offset + INT96_DST_BYTE_WIDTH],
+            &mut dst.value_buffer[dst_offset..dst_offset + INT96_DST_BYTE_WIDTH],
+        );
+    }
+}
+
+impl<T: DataType> Decoder for PlainDecoder<T> {
+    #[inline]
+    fn read(&mut self, dst: &mut ParquetMutableVector) {
+        self.read_batch(dst, 1)
+    }
+
+    /// Default implementation for PLAIN encoding, which uses a `memcpy` to copy from Parquet to the
+    /// Arrow vector. NOTE: this only works if the Parquet physical type has the same type width as
+    /// the Arrow's physical type (e.g., Parquet INT32 vs Arrow INT32). For other cases, we should
+    /// have special implementations.
+    #[inline]
+    fn read_batch(&mut self, dst: &mut ParquetMutableVector, num: usize) {
+        T::decode(&mut self.inner, dst, num);
+    }
+
+    #[inline]
+    fn skip_batch(&mut self, num: usize) {
+        T::skip(&mut self.inner, num);
+    }
+
+    #[inline]
+    fn encoding(&self) -> Encoding {
+        Encoding::PLAIN
+    }
+}
+
+/// A decoder for Parquet dictionary indices, which is always of integer type, and encoded with
+/// RLE/BitPacked encoding.
+pub struct DictDecoder {
+    /// Number of bits used to represent dictionary indices. Must be between `[0, 64]`.
+    bit_width: usize,
+
+    /// The number of total values in `data`
+    value_count: usize,
+
+    /// Bit reader
+    bit_reader: BitReader,
+
+    /// Number of values left in the current RLE run
+    rle_left: usize,
+
+    /// Number of values left in the current BIT_PACKED run
+    bit_packed_left: usize,
+
+    /// Current value in the RLE run. Unused if BIT_PACKED
+    current_value: u32,
+}
+
+impl DictDecoder {
+    pub fn new(buf: Buffer, num_values: usize) -> Self {
+        let bit_width = buf.as_bytes()[0] as usize;
+
+        Self {
+            bit_width,
+            value_count: num_values,
+            bit_reader: BitReader::new_all(buf.slice(1)),
+            rle_left: 0,
+            bit_packed_left: 0,
+            current_value: 0,
+        }
+    }
+}
+
+impl DictDecoder {
+    /// Reads the header of the next RLE/BitPacked run, and update the internal state such as # of
+    /// values for the next run, as well as the current value in case it's RLE.
+    fn reload(&mut self) {
+        if let Some(indicator_value) = self.bit_reader.get_vlq_int() {
+            if indicator_value & 1 == 1 {
+                self.bit_packed_left = ((indicator_value >> 1) * 8) as usize;
+            } else {
+                self.rle_left = (indicator_value >> 1) as usize;
+                let value_width = bit::ceil(self.bit_width, 8);
+                self.current_value = self.bit_reader.get_aligned::<u32>(value_width).unwrap();
+            }
+        } else {
+            panic!("Can't read VLQ int from BitReader");
+        }
+    }
+}
+
+impl Decoder for DictDecoder {
+    fn read(&mut self, dst: &mut ParquetMutableVector) {
+        let dst_slice = dst.value_buffer.as_slice_mut();
+        let dst_offset = dst.num_values * 4;
+
+        // We've finished the current run. Now load the next.
+        if self.rle_left == 0 && self.bit_packed_left == 0 {
+            self.reload();
+        }
+
+        let value = if self.rle_left > 0 {
+            self.rle_left -= 1;
+            self.current_value
+        } else {
+            self.bit_packed_left -= 1;
+            self.bit_reader.get_u32_value(self.bit_width)
+        };
+
+        // Directly copy the value into the destination buffer
+        unsafe {
+            let dst = &mut dst_slice[dst_offset..] as *mut [u8] as *mut u8 as *mut u32;
+            *dst = value;
+        }
+    }
+
+    fn read_batch(&mut self, dst: &mut ParquetMutableVector, num: usize) {
+        let mut values_read = 0;
+        let dst_slice = dst.value_buffer.as_slice_mut();
+        let mut dst_offset = dst.num_values * 4;
+
+        while values_read < num {
+            let num_to_read = num - values_read;
+            let mut dst = &mut dst_slice[dst_offset..] as *mut [u8] as *mut u8 as *mut u32;
+
+            if self.rle_left > 0 {
+                let n = std::cmp::min(num_to_read, self.rle_left);
+                unsafe {
+                    // Copy the current RLE value into the destination buffer.
+                    for _ in 0..n {
+                        *dst = self.current_value;
+                        dst = dst.offset(1);
+                    }
+                    dst_offset += 4 * n;
+                }
+                self.rle_left -= n;
+                values_read += n;
+            } else if self.bit_packed_left > 0 {
+                let n = std::cmp::min(num_to_read, self.bit_packed_left);
+                unsafe {
+                    // Decode the next `n` BitPacked values into u32 and put the result directly to
+                    // `dst`.
+                    self.bit_reader.get_u32_batch(dst, n, self.bit_width);
+                }
+                dst_offset += 4 * n;
+                self.bit_packed_left -= n;
+                values_read += n;
+            } else {
+                self.reload();
+            }
+        }
+    }
+
+    fn skip_batch(&mut self, num: usize) {
+        let mut values_skipped = 0;
+
+        while values_skipped < num {
+            let num_to_skip = num - values_skipped;
+
+            if self.rle_left > 0 {
+                let n = std::cmp::min(num_to_skip, self.rle_left);
+                self.rle_left -= n;
+                values_skipped += n;
+            } else if self.bit_packed_left > 0 {
+                let n = std::cmp::min(num_to_skip, self.bit_packed_left);
+                self.bit_reader.skip_bits(n * self.bit_width);
+                self.bit_packed_left -= n;
+                values_skipped += n;
+            } else {
+                self.reload();
+            }
+        }
+    }
+
+    fn encoding(&self) -> Encoding {
+        Encoding::RLE_DICTIONARY
+    }
+}
diff --git a/core/src/parquet/util/bit_packing.rs b/core/src/parquet/util/bit_packing.rs
new file mode 100644
index 000000000..fd39ba8fa
--- /dev/null
+++ b/core/src/parquet/util/bit_packing.rs
@@ -0,0 +1,3658 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Unpack 32 values with bit width `num_bits` from `in_ptr`, and write to `out_ptr`.
+/// Return the `in_ptr` where the starting offset points to the first byte after all the
+/// bytes that were consumed.
+// TODO: may be better to make these more compact using if-else conditions.
+//  However, this may require const generics:
+//     https://github.com/rust-lang/rust/issues/44580
+//  to eliminate the branching cost.
+// TODO: we should use SIMD instructions to further optimize this. I have explored
+//    https://github.com/tantivy-search/bitpacking
+// but the layout it uses for SIMD is different from Parquet.
+// TODO: support packing as well, which is used for encoding.
+pub unsafe fn unpack32(mut in_ptr: *const u32, out_ptr: *mut u32, num_bits: usize) -> *const u32 {
+    in_ptr = match num_bits {
+        0 => nullunpacker32(in_ptr, out_ptr),
+        1 => unpack1_32(in_ptr, out_ptr),
+        2 => unpack2_32(in_ptr, out_ptr),
+        3 => unpack3_32(in_ptr, out_ptr),
+        4 => unpack4_32(in_ptr, out_ptr),
+        5 => unpack5_32(in_ptr, out_ptr),
+        6 => unpack6_32(in_ptr, out_ptr),
+        7 => unpack7_32(in_ptr, out_ptr),
+        8 => unpack8_32(in_ptr, out_ptr),
+        9 => unpack9_32(in_ptr, out_ptr),
+        10 => unpack10_32(in_ptr, out_ptr),
+        11 => unpack11_32(in_ptr, out_ptr),
+        12 => unpack12_32(in_ptr, out_ptr),
+        13 => unpack13_32(in_ptr, out_ptr),
+        14 => unpack14_32(in_ptr, out_ptr),
+        15 => unpack15_32(in_ptr, out_ptr),
+        16 => unpack16_32(in_ptr, out_ptr),
+        17 => unpack17_32(in_ptr, out_ptr),
+        18 => unpack18_32(in_ptr, out_ptr),
+        19 => unpack19_32(in_ptr, out_ptr),
+        20 => unpack20_32(in_ptr, out_ptr),
+        21 => unpack21_32(in_ptr, out_ptr),
+        22 => unpack22_32(in_ptr, out_ptr),
+        23 => unpack23_32(in_ptr, out_ptr),
+        24 => unpack24_32(in_ptr, out_ptr),
+        25 => unpack25_32(in_ptr, out_ptr),
+        26 => unpack26_32(in_ptr, out_ptr),
+        27 => unpack27_32(in_ptr, out_ptr),
+        28 => unpack28_32(in_ptr, out_ptr),
+        29 => unpack29_32(in_ptr, out_ptr),
+        30 => unpack30_32(in_ptr, out_ptr),
+        31 => unpack31_32(in_ptr, out_ptr),
+        32 => unpack32_32(in_ptr, out_ptr),
+        _ => unimplemented!(),
+    };
+    in_ptr
+}
+
+unsafe fn nullunpacker32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    for _ in 0..32 {
+        *out = 0;
+        out = out.offset(1);
+    }
+    in_buf
+}
+
+unsafe fn unpack1_32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 1) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 2) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 3) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 5) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 7) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 9) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 21) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 23) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 25) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 26) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 27) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 29) & 1;
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 30) & 1;
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack2_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 26) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 2);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+    *out = (in_buf.read_unaligned()) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 26) % (1u32 << 2);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 2);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack3_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 21) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 27) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (3 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 25) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (3 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 23) % (1u32 << 3);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 26) % (1u32 << 3);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack4_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 4);
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 4);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 4);
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack5_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 25) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (5 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 23) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 28) % (1u32 << 5);
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (5 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 21) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 26) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (5 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (5 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 5);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 5);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack6_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (6 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (6 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (6 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (6 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 6);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 6);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack7_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 21) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (7 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 24) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (7 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (7 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 23) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (7 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (7 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (7 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 7);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 7);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack8_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 8);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 8);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack9_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (9 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 22) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (9 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (9 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 21) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (9 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (9 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (9 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (9 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (9 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 9);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 9);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack10_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (10 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (10 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (10 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (10 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (10 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (10 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (10 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (10 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 10);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 10);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack11_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (11 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (11 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (11 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (11 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (11 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (11 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (11 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (11 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 19) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (11 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 11);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 20) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (11 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 11);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack12_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (12 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 12);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (12 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 12);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack13_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (13 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (13 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (13 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (13 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (13 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (13 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (13 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (13 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 17) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (13 - 11);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (13 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 13);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 18) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (13 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (13 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 13);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 19;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack14_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (14 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (14 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (14 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (14 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (14 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (14 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (14 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (14 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (14 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 14);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (14 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (14 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (14 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 14);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack15_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 15);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 15) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (15 - 13);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (15 - 11);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (15 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (15 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (15 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (15 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (15 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 15);
+    out = out.offset(1);
+    *out = ((in_buf.read_unaligned()) >> 16) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (15 - 14);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (15 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (15 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (15 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (15 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (15 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (15 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 15);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 17;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack16_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+    out = out.offset(1);
+    in_buf = in_buf.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 16);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 16;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack17_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (17 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (17 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (17 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (17 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (17 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (17 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (17 - 14);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 14) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (17 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (17 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (17 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (17 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (17 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (17 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (17 - 11);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (17 - 13);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 13) % (1u32 << 17);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (17 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack18_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (18 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (18 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (18 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (18 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (18 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (18 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (18 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (18 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (18 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (18 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (18 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (18 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (18 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (18 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (18 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 18);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (18 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack19_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (19 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (19 - 12);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 12) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (19 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (19 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (19 - 11);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 11) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (19 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (19 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (19 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (19 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (19 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (19 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (19 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (19 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (19 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (19 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (19 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (19 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 19);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (19 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack20_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (20 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (20 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (20 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 20);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (20 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack21_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (21 - 10);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 10) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (21 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (21 - 9);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 9) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (21 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (21 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (21 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (21 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (21 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (21 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (21 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (21 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (21 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (21 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (21 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (21 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (21 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (21 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (21 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (21 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 21);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (21 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack22_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (22 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (22 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (22 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (22 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (22 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (22 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (22 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (22 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (22 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (22 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (22 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (22 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (22 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (22 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (22 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (22 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (22 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (22 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 22);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (22 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (22 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack23_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (23 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (23 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (23 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (23 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (23 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (23 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (23 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (23 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (23 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (23 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (23 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (23 - 7);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 7) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 21)) << (23 - 21);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (23 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (23 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (23 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (23 - 8);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 8) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (23 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (23 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (23 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 23);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (23 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (23 - 9);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 9;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack24_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 24);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (24 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (24 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack25_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (25 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (25 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (25 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (25 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (25 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (25 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (25 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (25 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (25 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (25 - 5);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 5) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 23)) << (25 - 23);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (25 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (25 - 9);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (25 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (25 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (25 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (25 - 6);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 6) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (25 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (25 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (25 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (25 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 25);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 21)) << (25 - 21);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (25 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (25 - 7);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 7;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack26_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (26 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (26 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (26 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (26 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (26 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (26 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (26 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (26 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (26 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (26 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (26 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (26 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (26 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (26 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (26 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (26 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (26 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (26 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (26 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (26 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 26);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (26 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (26 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (26 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (26 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack27_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (27 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (27 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (27 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (27 - 7);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (27 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (27 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (27 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (27 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (27 - 9);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (27 - 4);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 4) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 26)) << (27 - 26);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 21)) << (27 - 21);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (27 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (27 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (27 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (27 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 23)) << (27 - 23);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (27 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (27 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (27 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (27 - 3);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 3) % (1u32 << 27);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 25)) << (27 - 25);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (27 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (27 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (27 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (27 - 5);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 5;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack28_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 28);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (28 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (28 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (28 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (28 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (28 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (28 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack29_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 26)) << (29 - 26);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 23)) << (29 - 23);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (29 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (29 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (29 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (29 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (29 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (29 - 5);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 5;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (29 - 2);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 2) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 28)) << (29 - 28);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 25)) << (29 - 25);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (29 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (29 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (29 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (29 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (29 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (29 - 7);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (29 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (29 - 1);
+    out = out.offset(1);
+
+    *out = ((in_buf.read_unaligned()) >> 1) % (1u32 << 29);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 27)) << (29 - 27);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (29 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 21)) << (29 - 21);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (29 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (29 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (29 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (29 - 9);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (29 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (29 - 3);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 3;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack30_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 30);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 28)) << (30 - 28);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 26)) << (30 - 26);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (30 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (30 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (30 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (30 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (30 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (30 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (30 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (30 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (30 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (30 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (30 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (30 - 2);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 2;
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) % (1u32 << 30);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 28)) << (30 - 28);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 26)) << (30 - 26);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (30 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (30 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (30 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (30 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (30 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (30 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (30 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (30 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (30 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (30 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (30 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (30 - 2);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 2;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack31_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = (in_buf.read_unaligned()) % (1u32 << 31);
+    out = out.offset(1);
+    *out = (in_buf.read_unaligned()) >> 31;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 30)) << (31 - 30);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 30;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 29)) << (31 - 29);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 29;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 28)) << (31 - 28);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 28;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 27)) << (31 - 27);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 27;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 26)) << (31 - 26);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 26;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 25)) << (31 - 25);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 25;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 24)) << (31 - 24);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 24;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 23)) << (31 - 23);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 23;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 22)) << (31 - 22);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 22;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 21)) << (31 - 21);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 21;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 20)) << (31 - 20);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 20;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 19)) << (31 - 19);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 19;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 18)) << (31 - 18);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 18;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 17)) << (31 - 17);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 17;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 16)) << (31 - 16);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 16;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 15)) << (31 - 15);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 15;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 14)) << (31 - 14);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 14;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 13)) << (31 - 13);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 13;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 12)) << (31 - 12);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 12;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 11)) << (31 - 11);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 11;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 10)) << (31 - 10);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 10;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 9)) << (31 - 9);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 9;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 8)) << (31 - 8);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 8;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 7)) << (31 - 7);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 7;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 6)) << (31 - 6);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 6;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 5)) << (31 - 5);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 5;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 4)) << (31 - 4);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 4;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 3)) << (31 - 3);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 3;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 2)) << (31 - 2);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 2;
+    in_buf = in_buf.offset(1);
+    *out |= ((in_buf.read_unaligned()) % (1u32 << 1)) << (31 - 1);
+    out = out.offset(1);
+
+    *out = (in_buf.read_unaligned()) >> 1;
+
+    in_buf.offset(1)
+}
+
+unsafe fn unpack32_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+    in_buf = in_buf.offset(1);
+    out = out.offset(1);
+
+    *out = in_buf.read_unaligned();
+
+    in_buf.offset(1)
+}
diff --git a/core/src/parquet/util/buffer.rs b/core/src/parquet/util/buffer.rs
new file mode 100644
index 000000000..d584ac0b1
--- /dev/null
+++ b/core/src/parquet/util/buffer.rs
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ops::Index, slice::SliceIndex, sync::Arc};
+
+/// An immutable byte buffer.
+pub trait Buffer {
+    /// Returns the length (in bytes) of this buffer.
+    fn len(&self) -> usize;
+
+    /// Returns the byte array of this buffer, in range `[0, len)`.
+    fn data(&self) -> &[u8];
+
+    /// Returns whether this buffer is empty or not.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl Buffer for Vec<u8> {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn data(&self) -> &[u8] {
+        self
+    }
+}
+
+pub struct BufferRef {
+    inner: Arc<dyn Buffer>,
+    offset: usize,
+    len: usize,
+}
+
+impl BufferRef {
+    pub fn new(inner: Arc<dyn Buffer>) -> Self {
+        let len = inner.len();
+        Self {
+            inner,
+            offset: 0,
+            len,
+        }
+    }
+
+    /// Returns the length of this buffer.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    #[inline]
+    pub fn data(&self) -> &[u8] {
+        self.inner.data()
+    }
+
+    /// Creates a new byte buffer containing elements in `[offset, offset+len)`
+    #[inline]
+    pub fn slice(&self, offset: usize, len: usize) -> BufferRef {
+        assert!(
+            self.offset + offset + len <= self.inner.len(),
+            "can't create a buffer slice with offset exceeding original \
+             JNI array len {}, self.offset: {}, offset: {}, len: {}",
+            self.inner.len(),
+            self.offset,
+            offset,
+            len
+        );
+
+        Self {
+            inner: self.inner.clone(),
+            offset: self.offset + offset,
+            len,
+        }
+    }
+
+    /// Creates a new byte buffer containing all elements starting from `offset` in this byte array.
+    #[inline]
+    pub fn start(&self, offset: usize) -> BufferRef {
+        assert!(
+            self.offset + offset <= self.inner.len(),
+            "can't create a buffer slice with offset exceeding original \
+             JNI array len {}, self.offset: {}, offset: {}",
+            self.inner.len(),
+            self.offset,
+            offset
+        );
+        let len = self.inner.len() - offset - self.offset;
+        self.slice(offset, len)
+    }
+}
+
+impl AsRef<[u8]> for BufferRef {
+    fn as_ref(&self) -> &[u8] {
+        let slice = self.inner.as_ref().data();
+        &slice[self.offset..self.offset + self.len]
+    }
+}
+
+impl<Idx> Index<Idx> for BufferRef
+where
+    Idx: SliceIndex<[u8]>,
+{
+    type Output = Idx::Output;
+
+    fn index(&self, index: Idx) -> &Self::Output {
+        &self.as_ref()[index]
+    }
+}
diff --git a/core/src/parquet/util/hash_util.rs b/core/src/parquet/util/hash_util.rs
new file mode 100644
index 000000000..102e01ffc
--- /dev/null
+++ b/core/src/parquet/util/hash_util.rs
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+fn hash_(data: &[u8], seed: u32) -> u32 {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    unsafe {
+        if is_x86_feature_detected!("sse4.2") {
+            crc32_hash(data, seed)
+        } else {
+            murmur_hash2_64a(data, seed as u64) as u32
+        }
+    }
+
+    #[cfg(any(
+        target_arch = "aarch64",
+        target_arch = "arm",
+        target_arch = "riscv64",
+        target_arch = "wasm32"
+    ))]
+    unsafe {
+        murmur_hash2_64a(data, seed as u64) as u32
+    }
+}
+
+const MURMUR_PRIME: u64 = 0xc6a4a7935bd1e995;
+const MURMUR_R: i32 = 47;
+
+/// Rust implementation of MurmurHash2, 64-bit version for 64-bit platforms
+///
+/// SAFTETY Only safe on platforms which support unaligned loads (like x86_64)
+unsafe fn murmur_hash2_64a(data_bytes: &[u8], seed: u64) -> u64 {
+    let len = data_bytes.len();
+    let len_64 = (len / 8) * 8;
+    let data_bytes_64 =
+        std::slice::from_raw_parts(&data_bytes[0..len_64] as *const [u8] as *const u64, len / 8);
+
+    let mut h = seed ^ (MURMUR_PRIME.wrapping_mul(data_bytes.len() as u64));
+    for v in data_bytes_64 {
+        let mut k = *v;
+        k = k.wrapping_mul(MURMUR_PRIME);
+        k ^= k >> MURMUR_R;
+        k = k.wrapping_mul(MURMUR_PRIME);
+        h ^= k;
+        h = h.wrapping_mul(MURMUR_PRIME);
+    }
+
+    let data2 = &data_bytes[len_64..];
+
+    let v = len & 7;
+    if v == 7 {
+        h ^= (data2[6] as u64) << 48;
+    }
+    if v >= 6 {
+        h ^= (data2[5] as u64) << 40;
+    }
+    if v >= 5 {
+        h ^= (data2[4] as u64) << 32;
+    }
+    if v >= 4 {
+        h ^= (data2[3] as u64) << 24;
+    }
+    if v >= 3 {
+        h ^= (data2[2] as u64) << 16;
+    }
+    if v >= 2 {
+        h ^= (data2[1] as u64) << 8;
+    }
+    if v >= 1 {
+        h ^= data2[0] as u64;
+    }
+    if v > 0 {
+        h = h.wrapping_mul(MURMUR_PRIME);
+    }
+
+    h ^= h >> MURMUR_R;
+    h = h.wrapping_mul(MURMUR_PRIME);
+    h ^= h >> MURMUR_R;
+    h
+}
+
+/// CRC32 hash implementation using SSE4 instructions. Borrowed from Impala.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "sse4.2")]
+unsafe fn crc32_hash(bytes: &[u8], seed: u32) -> u32 {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let u32_num_bytes = std::mem::size_of::<u32>();
+    let mut num_bytes = bytes.len();
+    let num_words = num_bytes / u32_num_bytes;
+    num_bytes %= u32_num_bytes;
+
+    let bytes_u32: &[u32] = std::slice::from_raw_parts(
+        &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32,
+        num_words,
+    );
+
+    let mut offset = 0;
+    let mut hash = seed;
+    while offset < num_words {
+        hash = _mm_crc32_u32(hash, bytes_u32[offset]);
+        offset += 1;
+    }
+
+    offset = num_words * u32_num_bytes;
+    while offset < num_bytes {
+        hash = _mm_crc32_u8(hash, bytes[offset]);
+        offset += 1;
+    }
+
+    // The lower half of the CRC hash has poor uniformity, so swap the halves
+    // for anyone who only uses the first several bits of the hash.
+    hash = (hash << 16) | (hash >> 16);
+    hash
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_murmur2_64a() {
+        unsafe {
+            let result = murmur_hash2_64a(b"hello", 123);
+            assert_eq!(result, 2597646618390559622);
+
+            let result = murmur_hash2_64a(b"helloworld", 123);
+            assert_eq!(result, 4934371746140206573);
+
+            let result = murmur_hash2_64a(b"helloworldparquet", 123);
+            assert_eq!(result, 2392198230801491746);
+        }
+    }
+
+    #[test]
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn test_crc32() {
+        if is_x86_feature_detected!("sse4.2") {
+            unsafe {
+                let result = crc32_hash(b"hello", 123);
+                assert_eq!(result, 2927487359);
+
+                let result = crc32_hash(b"helloworld", 123);
+                assert_eq!(result, 314229527);
+
+                let result = crc32_hash(b"helloworldparquet", 123);
+                assert_eq!(result, 667078870);
+            }
+        }
+    }
+}
diff --git a/core/src/parquet/util/jni.rs b/core/src/parquet/util/jni.rs
new file mode 100644
index 000000000..000eeee0b
--- /dev/null
+++ b/core/src/parquet/util/jni.rs
@@ -0,0 +1,199 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use jni::{
+    errors::Result as JNIResult,
+    objects::{JMethodID, JString},
+    sys::{jboolean, jint, jobjectArray, jstring},
+    JNIEnv,
+};
+
+use parquet::{
+    basic::{Encoding, LogicalType, TimeUnit, Type as PhysicalType},
+    format::{MicroSeconds, MilliSeconds, NanoSeconds},
+    schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder},
+};
+
+/// Convert primitives from Spark side into a `ColumnDescriptor`.
+#[allow(clippy::too_many_arguments)]
+pub fn convert_column_descriptor(
+    env: &JNIEnv,
+    physical_type_id: jint,
+    logical_type_id: jint,
+    max_dl: jint,
+    max_rl: jint,
+    bit_width: jint,
+    is_signed: jboolean,
+    type_length: jint,
+    precision: jint,
+    scale: jint,
+    time_unit: jint,
+    is_adjusted_utc: jboolean,
+    jni_path: jobjectArray,
+) -> JNIResult<ColumnDescriptor> {
+    let physical_type = convert_physical_type(physical_type_id);
+    let type_length = fix_type_length(&physical_type, type_length);
+    let logical_type = if logical_type_id >= 0 {
+        Some(convert_logical_type(
+            logical_type_id,
+            bit_width,
+            is_signed,
+            precision,
+            scale,
+            time_unit,
+            is_adjusted_utc,
+        ))
+    } else {
+        // id < 0 means there is no logical type associated
+        None
+    };
+
+    // We don't care the column name here
+    let ty = PrimitiveTypeBuilder::new("f", physical_type)
+        .with_logical_type(logical_type)
+        .with_length(type_length)
+        .with_precision(precision) // Parquet crate requires to set this even with logical type
+        .with_scale(scale)
+        .build()
+        .unwrap(); // TODO: convert Parquet errot to JNI error
+    let path = convert_column_path(env, jni_path).unwrap();
+
+    let result = ColumnDescriptor::new(Arc::new(ty), max_dl as i16, max_rl as i16, path);
+    Ok(result)
+}
+
+pub fn convert_encoding(ordinal: jint) -> Encoding {
+    match ordinal {
+        0 => Encoding::PLAIN,
+        1 => Encoding::RLE,
+        3 => Encoding::BIT_PACKED,
+        4 => Encoding::PLAIN_DICTIONARY,
+        5 => Encoding::DELTA_BINARY_PACKED,
+        6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
+        7 => Encoding::DELTA_BYTE_ARRAY,
+        8 => Encoding::RLE_DICTIONARY,
+        _ => panic!("Invalid Java Encoding ordinal: {}", ordinal),
+    }
+}
+
+pub struct TypePromotionInfo {
+    pub(crate) physical_type: PhysicalType,
+    pub(crate) precision: i32,
+}
+
+impl TypePromotionInfo {
+    pub fn new_from_jni(physical_type_id: jint, precision: jint) -> Self {
+        let physical_type = convert_physical_type(physical_type_id);
+        Self {
+            physical_type,
+            precision,
+        }
+    }
+
+    pub fn new(physical_type: PhysicalType, precision: i32) -> Self {
+        Self {
+            physical_type,
+            precision,
+        }
+    }
+}
+
+fn convert_column_path(env: &JNIEnv, path: jobjectArray) -> JNIResult<ColumnPath> {
+    let array_len = env.get_array_length(path)?;
+    let mut res: Vec<String> = Vec::new();
+    for i in 0..array_len {
+        let p: JString = (env.get_object_array_element(path, i)?.into_inner() as jstring).into();
+        res.push(env.get_string(p)?.into());
+    }
+    Ok(ColumnPath::new(res))
+}
+
+fn convert_physical_type(id: jint) -> PhysicalType {
+    match id {
+        0 => PhysicalType::BOOLEAN,
+        1 => PhysicalType::INT32,
+        2 => PhysicalType::INT64,
+        3 => PhysicalType::INT96,
+        4 => PhysicalType::FLOAT,
+        5 => PhysicalType::DOUBLE,
+        6 => PhysicalType::BYTE_ARRAY,
+        7 => PhysicalType::FIXED_LEN_BYTE_ARRAY,
+        _ => panic!("Invalid id for Parquet physical type: {} ", id),
+    }
+}
+
+fn convert_logical_type(
+    id: jint,
+    bit_width: jint,
+    is_signed: jboolean,
+    precision: jint,
+    scale: jint,
+    time_unit: jint,
+    is_adjusted_utc: jboolean,
+) -> LogicalType {
+    match id {
+        0 => LogicalType::Integer {
+            bit_width: bit_width as i8,
+            is_signed: is_signed != 0,
+        },
+        1 => LogicalType::String,
+        2 => LogicalType::Decimal { scale, precision },
+        3 => LogicalType::Date,
+        4 => LogicalType::Timestamp {
+            is_adjusted_to_u_t_c: is_adjusted_utc != 0,
+            unit: convert_time_unit(time_unit),
+        },
+        5 => LogicalType::Enum,
+        6 => LogicalType::Uuid,
+        _ => panic!("Invalid id for Parquet logical type: {}", id),
+    }
+}
+
+fn convert_time_unit(time_unit: jint) -> TimeUnit {
+    match time_unit {
+        0 => TimeUnit::MILLIS(MilliSeconds::new()),
+        1 => TimeUnit::MICROS(MicroSeconds::new()),
+        2 => TimeUnit::NANOS(NanoSeconds::new()),
+        _ => panic!("Invalid time unit id for Parquet: {}", time_unit),
+    }
+}
+
+/// Fixes the type length in case they are not set (Parquet only explicitly set it for
+/// FIXED_LEN_BYTE_ARRAY type).
+fn fix_type_length(t: &PhysicalType, type_length: i32) -> i32 {
+    match t {
+        PhysicalType::INT32 | PhysicalType::FLOAT => 4,
+        PhysicalType::INT64 | PhysicalType::DOUBLE => 8,
+        PhysicalType::INT96 => 12,
+        _ => type_length,
+    }
+}
+
+fn get_method_id<'a>(env: &'a JNIEnv, class: &'a str, method: &str, sig: &str) -> JMethodID<'a> {
+    // first verify the class exists
+    let _ = env
+        .find_class(class)
+        .unwrap_or_else(|_| panic!("Class '{}' not found", class));
+    env.get_method_id(class, method, sig).unwrap_or_else(|_| {
+        panic!(
+            "Method '{}' with signature '{}' of class '{}' not found",
+            method, sig, class
+        )
+    })
+}
diff --git a/core/src/parquet/util/jni_buffer.rs b/core/src/parquet/util/jni_buffer.rs
new file mode 100644
index 000000000..33f36ed9d
--- /dev/null
+++ b/core/src/parquet/util/jni_buffer.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use core::slice;
+use std::ptr::NonNull;
+
+use jni::{
+    objects::{ReleaseMode, TypeArray},
+    sys::{jbyte, jbyteArray, JNI_TRUE},
+    JavaVM,
+};
+
+use crate::errors::{CometError, CometResult as Result};
+
+use super::Buffer;
+
+/// An immutable byte buffer wrapping a JNI byte array allocated on heap.
+///
+/// Unlike `AutoArray`, this doesn't have a lifetime and can be used across different JNI calls.
+pub struct JniBuffer {
+    /// A pointer for the JVM instance, used to obtain byte array elements (via
+    /// `GetByteArrayElements`) and release byte array elements (via `ReleaseByteArrayElements`).
+    jvm: JavaVM,
+    /// The original JNI byte array that backs this buffer
+    inner: jbyteArray,
+    /// The raw pointer from the JNI byte array
+    ptr: NonNull<i8>,
+    /// Total number of bytes in the original array (i.e., `inner`).
+    len: usize,
+    /// Whether the JNI byte array is copied or not.
+    is_copy: bool,
+}
+
+impl JniBuffer {
+    pub fn try_new(jvm: JavaVM, array: jbyteArray, len: usize) -> Result<Self> {
+        let env = jvm.get_env()?;
+        let mut is_copy = 0xff;
+        let ptr = jbyte::get(&env, array.into(), &mut is_copy)?;
+        let res = Self {
+            jvm,
+            inner: array,
+            ptr: NonNull::new(ptr)
+                .ok_or_else(|| CometError::NullPointer("null byte array pointer".to_string()))?,
+            len,
+            is_copy: is_copy == JNI_TRUE,
+        };
+        Ok(res)
+    }
+
+    /// Whether the JNI byte array is copied or not, i.e., whether the JVM pinned down the original
+    /// Java byte array, or made a new copy of it.
+    pub fn is_copy(&self) -> bool {
+        self.is_copy
+    }
+}
+
+impl Buffer for JniBuffer {
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    fn data(&self) -> &[u8] {
+        self.as_ref()
+    }
+}
+
+impl AsRef<[u8]> for JniBuffer {
+    fn as_ref(&self) -> &[u8] {
+        unsafe { slice::from_raw_parts(self.ptr.as_ptr() as *mut u8 as *const u8, self.len) }
+    }
+}
+
+impl Drop for JniBuffer {
+    fn drop(&mut self) {
+        let env = self.jvm.get_env().unwrap(); // TODO: log error here
+        jbyte::release(
+            &env,
+            self.inner.into(),
+            self.ptr,
+            ReleaseMode::NoCopyBack as i32, // don't copy back since it's read-only here
+        )
+        .unwrap();
+    }
+}
diff --git a/core/src/parquet/util/memory.rs b/core/src/parquet/util/memory.rs
new file mode 100644
index 000000000..a2bbbfdde
--- /dev/null
+++ b/core/src/parquet/util/memory.rs
@@ -0,0 +1,557 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utility methods and structs for working with memory.
+
+use std::{
+    fmt::{Debug, Display, Formatter, Result as FmtResult},
+    io::{Result as IoResult, Write},
+    mem,
+    ops::{Index, IndexMut},
+    sync::{
+        atomic::{AtomicI64, Ordering},
+        Arc, Weak,
+    },
+};
+
+// ----------------------------------------------------------------------
+// Memory Tracker classes
+
+/// Reference counted pointer for [`MemTracker`].
+pub type MemTrackerPtr = Arc<MemTracker>;
+/// Non-owning reference for [`MemTracker`].
+pub type WeakMemTrackerPtr = Weak<MemTracker>;
+
+/// Struct to track memory usage information.
+#[derive(Debug)]
+pub struct MemTracker {
+    // In the tuple, the first element is the current memory allocated (in bytes),
+    // and the second element is the maximum memory allocated so far (in bytes).
+    current_memory_usage: AtomicI64,
+    max_memory_usage: AtomicI64,
+}
+
+impl MemTracker {
+    /// Creates new memory tracker.
+    #[inline]
+    pub fn new() -> MemTracker {
+        MemTracker {
+            current_memory_usage: Default::default(),
+            max_memory_usage: Default::default(),
+        }
+    }
+
+    /// Returns the current memory consumption, in bytes.
+    pub fn memory_usage(&self) -> i64 {
+        self.current_memory_usage.load(Ordering::Acquire)
+    }
+
+    /// Returns the maximum memory consumption so far, in bytes.
+    pub fn max_memory_usage(&self) -> i64 {
+        self.max_memory_usage.load(Ordering::Acquire)
+    }
+
+    /// Adds `num_bytes` to the memory consumption tracked by this memory tracker.
+    #[inline]
+    pub fn alloc(&self, num_bytes: i64) {
+        let new_current = self
+            .current_memory_usage
+            .fetch_add(num_bytes, Ordering::Acquire)
+            + num_bytes;
+        self.max_memory_usage
+            .fetch_max(new_current, Ordering::Acquire);
+    }
+}
+
+impl Default for MemTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+/// Type alias for [`Buffer`].
+pub type ByteBuffer = Buffer<u8>;
+/// Type alias for [`BufferPtr`].
+pub type ByteBufferPtr = BufferPtr<u8>;
+
+/// A resize-able buffer class with generic member, with optional memory tracker.
+///
+/// Note that a buffer has two attributes:
+/// `capacity` and `size`: the former is the total number of space reserved for
+/// the buffer, while the latter is the actual number of elements.
+/// Invariant: `capacity` >= `size`.
+/// The total allocated bytes for a buffer equals to `capacity * sizeof<T>()`.
+pub struct Buffer<T: Clone> {
+    data: Vec<T>,
+    mem_tracker: Option<MemTrackerPtr>,
+    type_length: usize,
+}
+
+impl<T: Clone> Buffer<T> {
+    /// Creates new empty buffer.
+    pub fn new() -> Self {
+        Buffer {
+            data: vec![],
+            mem_tracker: None,
+            type_length: std::mem::size_of::<T>(),
+        }
+    }
+
+    /// Adds [`MemTracker`] for this buffer.
+    #[inline]
+    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
+        mc.alloc((self.data.capacity() * self.type_length) as i64);
+        self.mem_tracker = Some(mc);
+        self
+    }
+
+    /// Returns slice of data in this buffer.
+    #[inline]
+    pub fn data(&self) -> &[T] {
+        self.data.as_slice()
+    }
+
+    /// Sets data for this buffer.
+    #[inline]
+    pub fn set_data(&mut self, new_data: Vec<T>) {
+        if let Some(ref mc) = self.mem_tracker {
+            let capacity_diff = new_data.capacity() as i64 - self.data.capacity() as i64;
+            mc.alloc(capacity_diff * self.type_length as i64);
+        }
+        self.data = new_data;
+    }
+
+    /// Resizes underlying data in place to a new length `new_size`.
+    ///
+    /// If `new_size` is less than current length, data is truncated, otherwise, it is
+    /// extended to `new_size` with provided default value `init_value`.
+    ///
+    /// Memory tracker is also updated, if available.
+    #[inline]
+    pub fn resize(&mut self, new_size: usize, init_value: T) {
+        let old_capacity = self.data.capacity();
+        self.data.resize(new_size, init_value);
+        if let Some(ref mc) = self.mem_tracker {
+            let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
+            mc.alloc(capacity_diff * self.type_length as i64);
+        }
+    }
+
+    /// Clears underlying data.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.data.clear()
+    }
+
+    /// Reserves capacity `additional_capacity` for underlying data vector.
+    ///
+    /// Memory tracker is also updated, if available.
+    #[inline]
+    pub fn reserve(&mut self, additional_capacity: usize) {
+        let old_capacity = self.data.capacity();
+        self.data.reserve(additional_capacity);
+        if self.data.capacity() > old_capacity {
+            if let Some(ref mc) = self.mem_tracker {
+                let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
+                mc.alloc(capacity_diff * self.type_length as i64);
+            }
+        }
+    }
+
+    /// Returns [`BufferPtr`] with buffer data.
+    /// Buffer data is reset.
+    #[inline]
+    pub fn consume(&mut self) -> BufferPtr<T> {
+        let old_data = mem::take(&mut self.data);
+        let mut result = BufferPtr::new(old_data);
+        if let Some(ref mc) = self.mem_tracker {
+            result = result.with_mem_tracker(mc.clone());
+        }
+        result
+    }
+
+    /// Adds `value` to the buffer.
+    #[inline]
+    pub fn push(&mut self, value: T) {
+        self.data.push(value)
+    }
+
+    /// Returns current capacity for the buffer.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.data.capacity()
+    }
+
+    /// Returns current size for the buffer.
+    #[inline]
+    pub fn size(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Returns `true` if memory tracker is added to buffer, `false` otherwise.
+    #[inline]
+    pub fn is_mem_tracked(&self) -> bool {
+        self.mem_tracker.is_some()
+    }
+
+    /// Returns memory tracker associated with this buffer.
+    /// This may panic, if memory tracker is not set, use method above to check if
+    /// memory tracker is available.
+    #[inline]
+    pub fn mem_tracker(&self) -> &MemTrackerPtr {
+        self.mem_tracker.as_ref().unwrap()
+    }
+}
+
+impl<T: Clone> Default for Buffer<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: Sized + Clone> Index<usize> for Buffer<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &T {
+        &self.data[index]
+    }
+}
+
+impl<T: Sized + Clone> IndexMut<usize> for Buffer<T> {
+    fn index_mut(&mut self, index: usize) -> &mut T {
+        &mut self.data[index]
+    }
+}
+
+// TODO: implement this for other types
+impl Write for Buffer<u8> {
+    #[inline]
+    fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
+        let old_capacity = self.data.capacity();
+        let bytes_written = self.data.write(buf)?;
+        if let Some(ref mc) = self.mem_tracker {
+            if self.data.capacity() - old_capacity > 0 {
+                mc.alloc((self.data.capacity() - old_capacity) as i64)
+            }
+        }
+        Ok(bytes_written)
+    }
+
+    fn flush(&mut self) -> IoResult<()> {
+        // No-op
+        self.data.flush()
+    }
+}
+
+impl AsRef<[u8]> for Buffer<u8> {
+    fn as_ref(&self) -> &[u8] {
+        self.data.as_slice()
+    }
+}
+
+impl<T: Clone> Drop for Buffer<T> {
+    #[inline]
+    fn drop(&mut self) {
+        if let Some(ref mc) = self.mem_tracker {
+            mc.alloc(-((self.data.capacity() * self.type_length) as i64));
+        }
+    }
+}
+
+// ----------------------------------------------------------------------
+// Immutable Buffer (BufferPtr) classes
+
+/// An representation of a slice on a reference-counting and read-only byte array.
+/// Sub-slices can be further created from this. The byte array will be released
+/// when all slices are dropped.
+#[allow(clippy::rc_buffer)]
+#[derive(Clone, Debug)]
+pub struct BufferPtr<T> {
+    data: Arc<Vec<T>>,
+    start: usize,
+    len: usize,
+    // TODO: will this create too many references? rethink about this.
+    mem_tracker: Option<MemTrackerPtr>,
+}
+
+impl<T> BufferPtr<T> {
+    /// Creates new buffer from a vector.
+    pub fn new(v: Vec<T>) -> Self {
+        let len = v.len();
+        Self {
+            data: Arc::new(v),
+            start: 0,
+            len,
+            mem_tracker: None,
+        }
+    }
+
+    /// Returns slice of data in this buffer.
+    #[inline]
+    pub fn data(&self) -> &[T] {
+        &self.data[self.start..self.start + self.len]
+    }
+
+    /// Updates this buffer with new `start` position and length `len`.
+    ///
+    /// Range should be within current start position and length.
+    #[inline]
+    pub fn with_range(mut self, start: usize, len: usize) -> Self {
+        self.set_range(start, len);
+        self
+    }
+
+    /// Updates this buffer with new `start` position and length `len`.
+    ///
+    /// Range should be within current start position and length.
+    #[inline]
+    pub fn set_range(&mut self, start: usize, len: usize) {
+        assert!(self.start <= start && start + len <= self.start + self.len);
+        self.start = start;
+        self.len = len;
+    }
+
+    /// Adds memory tracker to this buffer.
+    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
+        self.mem_tracker = Some(mc);
+        self
+    }
+
+    /// Returns start position of this buffer.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// Returns length of this buffer
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether this buffer is empty
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Returns `true` if this buffer has memory tracker, `false` otherwise.
+    pub fn is_mem_tracked(&self) -> bool {
+        self.mem_tracker.is_some()
+    }
+
+    /// Returns a shallow copy of the buffer.
+    /// Reference counted pointer to the data is copied.
+    pub fn all(&self) -> BufferPtr<T> {
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start,
+            len: self.len,
+            mem_tracker: self.mem_tracker.as_ref().cloned(),
+        }
+    }
+
+    /// Returns a shallow copy of the buffer that starts with `start` position.
+    pub fn start_from(&self, start: usize) -> BufferPtr<T> {
+        assert!(start <= self.len);
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start + start,
+            len: self.len - start,
+            mem_tracker: self.mem_tracker.as_ref().cloned(),
+        }
+    }
+
+    /// Returns a shallow copy that is a range slice within this buffer.
+    pub fn range(&self, start: usize, len: usize) -> BufferPtr<T> {
+        assert!(start + len <= self.len);
+        BufferPtr {
+            data: self.data.clone(),
+            start: self.start + start,
+            len,
+            mem_tracker: self.mem_tracker.as_ref().cloned(),
+        }
+    }
+}
+
+impl<T: Sized> Index<usize> for BufferPtr<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &T {
+        assert!(index < self.len);
+        &self.data[self.start + index]
+    }
+}
+
+impl<T: Debug> Display for BufferPtr<T> {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{:?}", self.data)
+    }
+}
+
+impl<T> Drop for BufferPtr<T> {
+    fn drop(&mut self) {
+        if let Some(ref mc) = self.mem_tracker {
+            if Arc::strong_count(&self.data) == 1 && Arc::weak_count(&self.data) == 0 {
+                mc.alloc(-(self.data.capacity() as i64));
+            }
+        }
+    }
+}
+
+impl AsRef<[u8]> for BufferPtr<u8> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        &self.data[self.start..self.start + self.len]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_byte_buffer_mem_tracker() {
+        let mem_tracker = Arc::new(MemTracker::new());
+
+        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+        buffer.set_data(vec![0; 10]);
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+        buffer.set_data(vec![0; 20]);
+        let capacity = buffer.capacity() as i64;
+        assert_eq!(mem_tracker.memory_usage(), capacity);
+
+        let max_capacity = {
+            let mut buffer2 = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+            buffer2.reserve(30);
+            assert_eq!(
+                mem_tracker.memory_usage(),
+                buffer2.capacity() as i64 + capacity
+            );
+            buffer2.set_data(vec![0; 100]);
+            assert_eq!(
+                mem_tracker.memory_usage(),
+                buffer2.capacity() as i64 + capacity
+            );
+            buffer2.capacity() as i64 + capacity
+        };
+
+        assert_eq!(mem_tracker.memory_usage(), capacity);
+        assert_eq!(mem_tracker.max_memory_usage(), max_capacity);
+
+        buffer.reserve(40);
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+
+        buffer.consume();
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+    }
+
+    #[test]
+    fn test_byte_ptr_mem_tracker() {
+        let mem_tracker = Arc::new(MemTracker::new());
+
+        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
+        buffer.set_data(vec![0; 60]);
+
+        {
+            let buffer_capacity = buffer.capacity() as i64;
+            let buf_ptr = buffer.consume();
+            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+            {
+                let buf_ptr1 = buf_ptr.all();
+                {
+                    let _ = buf_ptr.start_from(20);
+                    assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+                }
+                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+                let _ = buf_ptr1.range(30, 20);
+                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+            }
+            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
+        }
+        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
+    }
+
+    #[test]
+    fn test_byte_buffer() {
+        let mut buffer = ByteBuffer::new();
+        assert_eq!(buffer.size(), 0);
+        assert_eq!(buffer.capacity(), 0);
+
+        let mut buffer2 = ByteBuffer::new();
+        buffer2.reserve(40);
+        assert_eq!(buffer2.size(), 0);
+        assert_eq!(buffer2.capacity(), 40);
+
+        buffer.set_data((0..5).collect());
+        assert_eq!(buffer.size(), 5);
+        assert_eq!(buffer[4], 4);
+
+        buffer.set_data((0..20).collect());
+        assert_eq!(buffer.size(), 20);
+        assert_eq!(buffer[10], 10);
+
+        let expected: Vec<u8> = (0..20).collect();
+        {
+            let data = buffer.data();
+            assert_eq!(data, expected.as_slice());
+        }
+
+        buffer.reserve(40);
+        assert!(buffer.capacity() >= 40);
+
+        let byte_ptr = buffer.consume();
+        assert_eq!(buffer.size(), 0);
+        assert_eq!(byte_ptr.as_ref(), expected.as_slice());
+
+        let values: Vec<u8> = (0..30).collect();
+        let _ = buffer.write(values.as_slice());
+        let _ = buffer.flush();
+
+        assert_eq!(buffer.data(), values.as_slice());
+    }
+
+    #[test]
+    fn test_byte_ptr() {
+        let values = (0..50).collect();
+        let ptr = ByteBufferPtr::new(values);
+        assert_eq!(ptr.len(), 50);
+        assert_eq!(ptr.start(), 0);
+        assert_eq!(ptr[40], 40);
+
+        let ptr2 = ptr.all();
+        assert_eq!(ptr2.len(), 50);
+        assert_eq!(ptr2.start(), 0);
+        assert_eq!(ptr2[40], 40);
+
+        let ptr3 = ptr.start_from(20);
+        assert_eq!(ptr3.len(), 30);
+        assert_eq!(ptr3.start(), 20);
+        assert_eq!(ptr3[0], 20);
+
+        let ptr4 = ptr3.range(10, 10);
+        assert_eq!(ptr4.len(), 10);
+        assert_eq!(ptr4.start(), 30);
+        assert_eq!(ptr4[0], 30);
+
+        let expected: Vec<u8> = (30..40).collect();
+        assert_eq!(ptr4.as_ref(), expected.as_slice());
+    }
+}
diff --git a/core/src/parquet/util/mod.rs b/core/src/parquet/util/mod.rs
new file mode 100644
index 000000000..6a8c731d4
--- /dev/null
+++ b/core/src/parquet/util/mod.rs
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub(crate) mod bit_packing;
+pub mod hash_util;
+pub mod jni;
+pub mod memory;
+
+mod buffer;
+pub use buffer::*;
+mod jni_buffer;
+pub use jni_buffer::*;
+
+pub mod test_common;
diff --git a/core/src/parquet/util/test_common/file_util.rs b/core/src/parquet/util/test_common/file_util.rs
new file mode 100644
index 000000000..78e42d29e
--- /dev/null
+++ b/core/src/parquet/util/test_common/file_util.rs
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{env, fs, io::Write, path::PathBuf};
+
+/// Returns file handle for a temp file in 'target' directory with a provided content
+pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
+    // build tmp path to a file in "target/debug/testdata"
+    let mut path_buf = env::current_dir().unwrap();
+    path_buf.push("target");
+    path_buf.push("debug");
+    path_buf.push("testdata");
+    fs::create_dir_all(&path_buf).unwrap();
+    path_buf.push(file_name);
+
+    // write file content
+    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
+    tmp_file.write_all(content).unwrap();
+    tmp_file.sync_all().unwrap();
+
+    // return file handle for both read and write
+    let file = fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .open(path_buf.as_path());
+    assert!(file.is_ok());
+    file.unwrap()
+}
+
+pub fn get_temp_filename() -> PathBuf {
+    let mut path_buf = env::current_dir().unwrap();
+    path_buf.push("target");
+    path_buf.push("debug");
+    path_buf.push("testdata");
+    fs::create_dir_all(&path_buf).unwrap();
+    path_buf.push(rand::random::<i16>().to_string());
+
+    path_buf
+}
diff --git a/core/src/parquet/util/test_common/mod.rs b/core/src/parquet/util/test_common/mod.rs
new file mode 100644
index 000000000..e46d73223
--- /dev/null
+++ b/core/src/parquet/util/test_common/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod file_util;
+pub mod page_util;
+pub mod rand_gen;
+
+pub use self::rand_gen::{random_bools, random_bytes, random_numbers, random_numbers_range};
+
+pub use self::file_util::{get_temp_file, get_temp_filename};
diff --git a/core/src/parquet/util/test_common/page_util.rs b/core/src/parquet/util/test_common/page_util.rs
new file mode 100644
index 000000000..b366994e5
--- /dev/null
+++ b/core/src/parquet/util/test_common/page_util.rs
@@ -0,0 +1,317 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::VecDeque, mem};
+
+use rand::distributions::uniform::SampleUniform;
+
+use parquet::{
+    basic::Encoding,
+    column::page::{Page, PageIterator, PageMetadata, PageReader},
+    data_type::DataType,
+    encodings::{
+        encoding::{get_encoder, DictEncoder, Encoder},
+        levels::{max_buffer_size, LevelEncoder},
+    },
+    errors::Result,
+    schema::types::{ColumnDescPtr, SchemaDescPtr},
+    util::memory::ByteBufferPtr,
+};
+
+use super::random_numbers_range;
+
+pub trait DataPageBuilder {
+    fn add_rep_levels(&mut self, max_level: i16, rep_levels: &[i16]);
+    fn add_def_levels(&mut self, max_level: i16, def_levels: &[i16]);
+    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]);
+    fn add_indices(&mut self, indices: ByteBufferPtr);
+    fn consume(self) -> Page;
+}
+
+/// A utility struct for building data pages (v1 or v2). Callers must call:
+///   - add_rep_levels()
+///   - add_def_levels()
+///   - add_values() for normal data page / add_indices() for dictionary data page
+///   - consume()
+/// in order to populate and obtain a data page.
+pub struct DataPageBuilderImpl {
+    desc: ColumnDescPtr,
+    encoding: Option<Encoding>,
+    num_values: u32,
+    buffer: Vec<u8>,
+    rep_levels_byte_len: u32,
+    def_levels_byte_len: u32,
+    datapage_v2: bool,
+}
+
+impl DataPageBuilderImpl {
+    // `num_values` is the number of non-null values to put in the data page.
+    // `datapage_v2` flag is used to indicate if the generated data page should use V2
+    // format or not.
+    pub fn new(desc: ColumnDescPtr, num_values: u32, datapage_v2: bool) -> Self {
+        DataPageBuilderImpl {
+            desc,
+            encoding: None,
+            num_values,
+            buffer: vec![],
+            rep_levels_byte_len: 0,
+            def_levels_byte_len: 0,
+            datapage_v2,
+        }
+    }
+
+    // Adds levels to the buffer and return number of encoded bytes
+    fn add_levels(&mut self, max_level: i16, levels: &[i16]) -> u32 {
+        if max_level <= 0 {
+            return 0;
+        }
+        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
+        let mut level_encoder = LevelEncoder::v1(Encoding::RLE, max_level, size);
+        level_encoder.put(levels);
+        let encoded_levels = level_encoder.consume();
+        // Actual encoded bytes (without length offset)
+        let encoded_bytes = &encoded_levels[mem::size_of::<i32>()..];
+        if self.datapage_v2 {
+            // Level encoder always initializes with offset of i32, where it stores
+            // length of encoded data; for data page v2 we explicitly
+            // store length, therefore we should skip i32 bytes.
+            self.buffer.extend_from_slice(encoded_bytes);
+        } else {
+            self.buffer.extend_from_slice(encoded_levels.as_slice());
+        }
+        encoded_bytes.len() as u32
+    }
+}
+
+impl DataPageBuilder for DataPageBuilderImpl {
+    fn add_rep_levels(&mut self, max_levels: i16, rep_levels: &[i16]) {
+        self.num_values = rep_levels.len() as u32;
+        self.rep_levels_byte_len = self.add_levels(max_levels, rep_levels);
+    }
+
+    fn add_def_levels(&mut self, max_levels: i16, def_levels: &[i16]) {
+        assert!(
+            self.num_values == def_levels.len() as u32,
+            "Must call `add_rep_levels() first!`"
+        );
+
+        self.def_levels_byte_len = self.add_levels(max_levels, def_levels);
+    }
+
+    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]) {
+        assert!(
+            self.num_values >= values.len() as u32,
+            "num_values: {}, values.len(): {}",
+            self.num_values,
+            values.len()
+        );
+        self.encoding = Some(encoding);
+        let mut encoder: Box<dyn Encoder<T>> =
+            get_encoder::<T>(encoding).expect("get_encoder() should be OK");
+        encoder.put(values).expect("put() should be OK");
+        let encoded_values = encoder
+            .flush_buffer()
+            .expect("consume_buffer() should be OK");
+        self.buffer.extend_from_slice(encoded_values.data());
+    }
+
+    fn add_indices(&mut self, indices: ByteBufferPtr) {
+        self.encoding = Some(Encoding::RLE_DICTIONARY);
+        self.buffer.extend_from_slice(indices.data());
+    }
+
+    fn consume(self) -> Page {
+        if self.datapage_v2 {
+            Page::DataPageV2 {
+                buf: ByteBufferPtr::new(self.buffer),
+                num_values: self.num_values,
+                encoding: self.encoding.unwrap(),
+                num_nulls: 0, /* set to dummy value - don't need this when reading
+                               * data page */
+                num_rows: self.num_values, /* also don't need this when reading
+                                            * data page */
+                def_levels_byte_len: self.def_levels_byte_len,
+                rep_levels_byte_len: self.rep_levels_byte_len,
+                is_compressed: false,
+                statistics: None, // set to None, we do not need statistics for tests
+            }
+        } else {
+            Page::DataPage {
+                buf: ByteBufferPtr::new(self.buffer),
+                num_values: self.num_values,
+                encoding: self.encoding.unwrap(),
+                def_level_encoding: Encoding::RLE,
+                rep_level_encoding: Encoding::RLE,
+                statistics: None, // set to None, we do not need statistics for tests
+            }
+        }
+    }
+}
+
+/// A utility page reader which stores pages in memory.
+pub struct InMemoryPageReader<P: Iterator<Item = Page>> {
+    page_iter: P,
+}
+
+impl<P: Iterator<Item = Page>> InMemoryPageReader<P> {
+    pub fn new(pages: impl IntoIterator<Item = Page, IntoIter = P>) -> Self {
+        Self {
+            page_iter: pages.into_iter(),
+        }
+    }
+}
+
+impl<P: Iterator<Item = Page> + Send> PageReader for InMemoryPageReader<P> {
+    fn get_next_page(&mut self) -> Result<Option<Page>> {
+        Ok(self.page_iter.next())
+    }
+
+    fn peek_next_page(&mut self) -> Result<Option<PageMetadata>> {
+        unimplemented!()
+    }
+
+    fn skip_next_page(&mut self) -> Result<()> {
+        unimplemented!()
+    }
+}
+
+impl<P: Iterator<Item = Page> + Send> Iterator for InMemoryPageReader<P> {
+    type Item = Result<Page>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.get_next_page().transpose()
+    }
+}
+
+/// A utility page iterator which stores page readers in memory, used for tests.
+#[derive(Clone)]
+pub struct InMemoryPageIterator<I: Iterator<Item = Vec<Page>>> {
+    schema: SchemaDescPtr,
+    column_desc: ColumnDescPtr,
+    page_reader_iter: I,
+}
+
+impl<I: Iterator<Item = Vec<Page>>> InMemoryPageIterator<I> {
+    pub fn new(
+        schema: SchemaDescPtr,
+        column_desc: ColumnDescPtr,
+        pages: impl IntoIterator<Item = Vec<Page>, IntoIter = I>,
+    ) -> Self {
+        Self {
+            schema,
+            column_desc,
+            page_reader_iter: pages.into_iter(),
+        }
+    }
+}
+
+impl<I: Iterator<Item = Vec<Page>>> Iterator for InMemoryPageIterator<I> {
+    type Item = Result<Box<dyn PageReader>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.page_reader_iter
+            .next()
+            .map(|x| Ok(Box::new(InMemoryPageReader::new(x)) as Box<dyn PageReader>))
+    }
+}
+
+impl<I: Iterator<Item = Vec<Page>> + Send> PageIterator for InMemoryPageIterator<I> {}
+
+#[allow(clippy::too_many_arguments)]
+pub fn make_pages<T: DataType>(
+    desc: ColumnDescPtr,
+    encoding: Encoding,
+    num_pages: usize,
+    levels_per_page: usize,
+    min: T::T,
+    max: T::T,
+    def_levels: &mut Vec<i16>,
+    rep_levels: &mut Vec<i16>,
+    values: &mut Vec<T::T>,
+    pages: &mut VecDeque<Page>,
+    use_v2: bool,
+) where
+    T::T: PartialOrd + SampleUniform + Copy,
+{
+    let mut num_values = 0;
+    let max_def_level = desc.max_def_level();
+    let max_rep_level = desc.max_rep_level();
+
+    let mut dict_encoder = DictEncoder::<T>::new(desc.clone());
+
+    for i in 0..num_pages {
+        let mut num_values_cur_page = 0;
+        let level_range = i * levels_per_page..(i + 1) * levels_per_page;
+
+        if max_def_level > 0 {
+            random_numbers_range(levels_per_page, 0, max_def_level + 1, def_levels);
+            for dl in &def_levels[level_range.clone()] {
+                if *dl == max_def_level {
+                    num_values_cur_page += 1;
+                }
+            }
+        } else {
+            num_values_cur_page = levels_per_page;
+        }
+        if max_rep_level > 0 {
+            random_numbers_range(levels_per_page, 0, max_rep_level + 1, rep_levels);
+        }
+        random_numbers_range(num_values_cur_page, min, max, values);
+
+        // Generate the current page
+
+        let mut pb = DataPageBuilderImpl::new(desc.clone(), num_values_cur_page as u32, use_v2);
+        if max_rep_level > 0 {
+            pb.add_rep_levels(max_rep_level, &rep_levels[level_range.clone()]);
+        }
+        if max_def_level > 0 {
+            pb.add_def_levels(max_def_level, &def_levels[level_range]);
+        }
+
+        let value_range = num_values..num_values + num_values_cur_page;
+        match encoding {
+            Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
+                let _ = dict_encoder.put(&values[value_range.clone()]);
+                let indices = dict_encoder
+                    .write_indices()
+                    .expect("write_indices() should be OK");
+                pb.add_indices(indices);
+            }
+            Encoding::PLAIN => {
+                pb.add_values::<T>(encoding, &values[value_range]);
+            }
+            enc => panic!("Unexpected encoding {}", enc),
+        }
+
+        let data_page = pb.consume();
+        pages.push_back(data_page);
+        num_values += num_values_cur_page;
+    }
+
+    if encoding == Encoding::PLAIN_DICTIONARY || encoding == Encoding::RLE_DICTIONARY {
+        let dict = dict_encoder
+            .write_dict()
+            .expect("write_dict() should be OK");
+        let dict_page = Page::DictionaryPage {
+            buf: dict,
+            num_values: dict_encoder.num_entries() as u32,
+            encoding: Encoding::RLE_DICTIONARY,
+            is_sorted: false,
+        };
+        pages.push_front(dict_page);
+    }
+}
diff --git a/core/src/parquet/util/test_common/rand_gen.rs b/core/src/parquet/util/test_common/rand_gen.rs
new file mode 100644
index 000000000..c876520ab
--- /dev/null
+++ b/core/src/parquet/util/test_common/rand_gen.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use rand::{
+    distributions::{uniform::SampleUniform, Distribution, Standard},
+    thread_rng, Rng,
+};
+
+pub fn random_bytes(n: usize) -> Vec<u8> {
+    let mut result = vec![];
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen_range(0..255));
+    }
+    result
+}
+
+pub fn random_bools(n: usize) -> Vec<bool> {
+    let mut result = vec![];
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen::<bool>());
+    }
+    result
+}
+
+pub fn random_numbers<T>(n: usize) -> Vec<T>
+where
+    Standard: Distribution<T>,
+{
+    let mut rng = thread_rng();
+    Standard.sample_iter(&mut rng).take(n).collect()
+}
+
+pub fn random_numbers_range<T>(n: usize, low: T, high: T, result: &mut Vec<T>)
+where
+    T: PartialOrd + SampleUniform + Copy,
+{
+    let mut rng = thread_rng();
+    for _ in 0..n {
+        result.push(rng.gen_range(low..high));
+    }
+}
diff --git a/dev/cargo.config b/dev/cargo.config
new file mode 100644
index 000000000..642b84251
--- /dev/null
+++ b/dev/cargo.config
@@ -0,0 +1,27 @@
+<?xml version='1.0'?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+[target.x86_64-apple-darwin]
+linker = "x86_64-apple-darwin21.4-clang"
+ar = "x86_64-apple-darwin21.4-ar"
+
+[target.aarch64-apple-darwin]
+linker = "aarch64-apple-darwin21.4-clang"
+ar = "aarch64-apple-darwin21.4-ar"
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
new file mode 100644
index 000000000..35976948c
--- /dev/null
+++ b/dev/checkstyle-suppressions.xml
@@ -0,0 +1,34 @@
+<?xml version='1.0'?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<!DOCTYPE suppressions PUBLIC
+"-//Puppy Crawl//DTD Suppressions 1.1//EN"
+"https://checkstyle.org/dtds/suppressions_1_1.dtd">
+
+<!--
+
+    This file contains suppression rules for Checkstyle checks.
+    Ideally only files that cannot be modified (e.g. third-party code)
+    should be added here. All other violations should be fixed.
+
+-->
+
+<suppressions>
+</suppressions>
diff --git a/dev/ensure-jars-have-correct-contents.sh b/dev/ensure-jars-have-correct-contents.sh
new file mode 100644
index 000000000..d364ae1f4
--- /dev/null
+++ b/dev/ensure-jars-have-correct-contents.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Borrowed from Hadoop
+
+# Usage: $0 [/path/to/some/example.jar;/path/to/another/example/created.jar]
+#
+# accepts a single command line argument with a colon separated list of
+# paths to jars to check. Iterates through each such passed jar and checks
+# all the contained paths to make sure they follow the below constructed
+# safe list.
+
+# We use +=, which is a bash 3.1+ feature
+if [[ -z "${BASH_VERSINFO[0]}" ]] \
+   || [[ "${BASH_VERSINFO[0]}" -lt 3 ]] \
+   || [[ "${BASH_VERSINFO[0]}" -eq 3 && "${BASH_VERSINFO[1]}" -lt 1 ]]; then
+  echo "bash v3.1+ is required. Sorry."
+  exit 1
+fi
+
+set -e
+set -o pipefail
+
+allowed_expr="(^org/$|^org/apache/$"
+# we have to allow the directories that lead to the org/apache/comet dir
+# We allow all the classes under the following packages:
+#   * org.apache.comet
+#   * org.apache.spark.sql.comet
+#   * org.apache.arrow.c
+allowed_expr+="|^org/apache/comet/"
+allowed_expr+="|^org/apache/spark/sql/comet/"
+allowed_expr+="|^org/apache/arrow/c/"
+#   * whatever in the "META-INF" directory
+allowed_expr+="|^META-INF/"
+#   * whatever under the "conf" directory
+allowed_expr+="|^conf/"
+#   * whatever under the "lib" directory
+allowed_expr+="|^lib/"
+# Native dynamic library from Arrow
+allowed_expr+="|^x86_64/"
+allowed_expr+="|^aarch_64/"
+allowed_expr+="|^x86_64/libarrow_cdata_jni.so$"
+allowed_expr+="|^x86_64/libarrow_cdata_jni.dylib$"
+allowed_expr+="|^x86_64/arrow_cdata_jni.dll$"
+allowed_expr+="|^aarch_64/libarrow_cdata_jni.dylib$"
+# Two classes in Arrow C module: StructVectorLoader and StructVectorUnloader, are not
+# under org/apache/arrow/c, so we'll need to treat them specially.
+allowed_expr+="|^org/apache/arrow/$"
+allowed_expr+="|^org/apache/arrow/vector/$"
+allowed_expr+="|^org/apache/arrow/vector/StructVectorLoader.class$"
+allowed_expr+="|^org/apache/arrow/vector/StructVectorUnloader.class$"
+# Log4J stuff
+allowed_expr+="|log4j2.properties"
+# Git Info properties
+allowed_expr+="|comet-git-info.properties"
+# For some reason org/apache/spark/sql directory is also included, but with no content
+allowed_expr+="|^org/apache/spark/$"
+allowed_expr+="|^org/apache/spark/sql/$"
+allowed_expr+="|^org/apache/spark/CometPlugin.class$"
+allowed_expr+="|^org/apache/spark/CometDriverPlugin.*$"
+
+allowed_expr+=")"
+declare -i bad_artifacts=0
+declare -a bad_contents
+declare -a artifact_list
+while IFS='' read -r -d ';' line; do artifact_list+=("$line"); done < <(printf '%s;' "$1")
+if [ "${#artifact_list[@]}" -eq 0 ]; then
+  echo "[ERROR] No artifacts passed in."
+  exit 1
+fi
+
+jar_list_failed ()
+{
+    echo "[ERROR] Listing jar contents for file '${artifact}' failed."
+    exit 1
+}
+trap jar_list_failed SIGUSR1
+
+for artifact in "${artifact_list[@]}"; do
+  bad_contents=()
+  # Note: On Windows the output from jar tf may contain \r\n's.  Normalize to \n.
+  while IFS='' read -r line; do bad_contents+=("$line"); done < <( ( jar tf "${artifact}" | sed 's/\\r//' || kill -SIGUSR1 $$ ) | grep -v -E "${allowed_expr}" )
+  if [ ${#bad_contents[@]} -gt 0 ]; then
+    echo "[ERROR] Found artifact with unexpected contents: '${artifact}'"
+    echo "    Please check the following and either correct the build or update"
+    echo "    the allowed list with reasoning."
+    echo ""
+    for bad_line in "${bad_contents[@]}"; do
+      echo "    ${bad_line}"
+    done
+    bad_artifacts=${bad_artifacts}+1
+  else
+    echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'"
+  fi
+done
+
+if [ "${bad_artifacts}" -gt 0 ]; then
+  exit 1
+fi
diff --git a/dev/scalastyle-config.xml b/dev/scalastyle-config.xml
new file mode 100644
index 000000000..7d01f726d
--- /dev/null
+++ b/dev/scalastyle-config.xml
@@ -0,0 +1,374 @@
+<!--
+
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we enforce                                   -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+  <!-- disabled for now since we don't have ASF header in our project
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+  -->
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
+      If you must use Await.ready, wrap the code block with
+      // scalastyle:off awaitready
+      Await.ready(...)
+      // scalastyle:on awaitready
+    ]]></customMessage>
+  </check>
+
+  <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
+      should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
+      If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
+      // scalastyle:off caselocale
+      .toUpperCase
+      .toLowerCase
+      // scalastyle:on caselocale
+    ]]></customMessage>
+  </check>
+
+  <check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
+      If you must throw Error, wrap the code block with
+      // scalastyle:off throwerror
+      throw new XXXError(...)
+      // scalastyle:on throwerror
+    ]]></customMessage>
+  </check>
+
+  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+    <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+  </check>
+
+  <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use "FileSystem.get(Configuration conf)"? If the input
+      configuration is not set properly, a default FileSystem instance will be returned. It can
+      lead to errors when you deal with multiple file systems. Please consider using
+      "FileSystem.get(URI uri, Configuration conf)" or "Path.getFileSystem(Configuration conf)" instead.
+      If you must use the method "FileSystem.get(Configuration conf)", wrap the code block with
+      // scalastyle:off FileSystemGet
+      FileSystem.get(...)
+      // scalastyle:on FileSystemGet
+    ]]></customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,org,apache,3rdParty,comet</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.org">org\..*</parameter>
+      <parameter name="group.apache">org\.apache\..*</parameter>
+      <parameter name="group.3rdParty">(?!org\.apache\.comet\.).*</parameter>
+      <parameter name="group.comet">org\.apache\.comet\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
+    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
+  </check>
+
+  <!-- SPARK-16877: Avoid Java annotations -->
+  <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
+    <customMessage><![CDATA[
+      Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
+
+      Please refer below page to see the details of changes around Seq / IndexedSeq.
+      https://docs.scala-lang.org/overviews/core/collections-migration-213.html
+
+      If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
+    ]]></customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+    <parameters><parameter name="illegalImports"><![CDATA[org.apache.log4j]]></parameter></parameters>
+    <customMessage>Please use Apache Log4j 2 instead.</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we don't want                                -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
+    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
+  </check>
+</scalastyle>
diff --git a/doc/comet-overview.png b/doc/comet-overview.png
new file mode 100644
index 000000000..a3148d8f8
Binary files /dev/null and b/doc/comet-overview.png differ
diff --git a/doc/comet-plan.png b/doc/comet-plan.png
new file mode 100644
index 000000000..38c47839e
Binary files /dev/null and b/doc/comet-plan.png differ
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 000000000..391e7d67c
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,911 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.comet</groupId>
+  <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
+  <version>0.1.0-SNAPSHOT</version>
+  <packaging>pom</packaging>
+  <name>Comet Project Parent POM</name>
+
+  <modules>
+    <module>common</module>
+    <module>spark</module>
+    <module>spark-integration</module>
+  </modules>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+    <java.version>11</java.version>
+    <maven.compiler.source>11</maven.compiler.source>
+    <maven.compiler.target>11</maven.compiler.target>
+    <scala.version>2.12.17</scala.version>
+    <scala.binary.version>2.12</scala.binary.version>
+    <scala.plugin.version>4.7.2</scala.plugin.version>
+    <scalatest.version>3.2.9</scalatest.version>
+    <scalatest-maven-plugin.version>2.0.2</scalatest-maven-plugin.version>
+    <spark.version>3.4.2</spark.version>
+    <spark.version.short>3.4</spark.version.short>
+    <spark.maven.scope>provided</spark.maven.scope>
+    <protobuf.version>3.17.3</protobuf.version>
+    <parquet.version>1.13.1</parquet.version>
+    <parquet.maven.scope>provided</parquet.maven.scope>
+    <arrow.version>14.0.2</arrow.version>
+    <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
+    <spotless.version>2.29.0</spotless.version>
+    <jni.dir>${project.basedir}/../core/target/debug</jni.dir>
+    <platform>darwin</platform>
+    <arch>x86_64</arch>
+    <comet.shade.packageName>org.apache.comet.shaded</comet.shade.packageName>
+    <!-- Used by some tests inherited from Spark to get project root directory -->
+    <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+
+    <!-- Reverse default (skip installation), and then enable only for child modules -->
+    <maven.deploy.skip>true</maven.deploy.skip>
+  </properties>
+
+  <dependencyManagement>
+    <dependencies>
+      <!-- Spark dependencies -->
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>${spark.maven.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-column</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+          </exclusion>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>${spark.maven.scope}</scope>
+        <exclusions>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+
+          <!-- Comet uses arrow-memory-unsafe -->
+          <exclusion>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-memory-netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <!-- Arrow dependencies -->
+      <dependency>
+        <groupId>org.apache.arrow</groupId>
+        <artifactId>arrow-vector</artifactId>
+        <version>${arrow.version}</version>
+        <!-- Exclude the following in favor of those from Spark -->
+        <exclusions>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-annotations</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-common</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.arrow</groupId>
+        <artifactId>arrow-memory-unsafe</artifactId>
+        <version>${arrow.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.arrow</groupId>
+        <artifactId>arrow-c-data</artifactId>
+        <version>${arrow.version}</version>
+      </dependency>
+
+      <!-- Parquet dependencies -->
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-column</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.maven.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-hadoop</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.maven.scope}</scope>
+        <exclusions>
+          <!-- Exclude the following in favor of jakarta.annotation:jakarta.annotation-api -->
+          <exclusion>
+            <groupId>javax.annotation</groupId>
+            <artifactId>javax.annotation-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-avro</artifactId>
+        <version>${parquet.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-hadoop</artifactId>
+        <version>${parquet.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <!-- Exclude the following in favor of jakarta.annotation:jakarta.annotation-api -->
+          <exclusion>
+            <groupId>javax.annotation</groupId>
+            <artifactId>javax.annotation-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <!-- Others -->
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-library</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.google.protobuf</groupId>
+        <artifactId>protobuf-java</artifactId>
+        <version>${protobuf.version}</version>
+      </dependency>
+
+      <!-- Shaded deps marked as provided. These are promoted to compile scope
+           in the modules where we want the shaded classes to appear in the
+           associated jar. -->
+      <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>14.0.1</version>
+      </dependency>
+      <!-- End of shaded deps -->
+
+      <!-- Test dependencies -->
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-core_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.comet</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+
+          <!-- We are using arrow-memory-unsafe -->
+          <exclusion>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-memory-netty</artifactId>
+          </exclusion>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+
+          <!-- We are using arrow-memory-unsafe -->
+          <exclusion>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-memory-netty</artifactId>
+          </exclusion>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-sql_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-column</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.comet</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-hive_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.comet</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.13.2</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.assertj</groupId>
+        <artifactId>assertj-core</artifactId>
+        <version>3.23.1</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest_${scala.binary.version}</artifactId>
+        <version>${scalatest.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-reflect</artifactId>
+        <version>${scala.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scalatestplus</groupId>
+        <artifactId>junit-4-13_${scala.binary.version}</artifactId>
+        <version>3.2.14.0</version>
+        <scope>test</scope>
+      </dependency>
+
+      <!-- For benchmarks to access S3 -->
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <!-- We're using hadoop-client -->
+          <exclusion>
+            <groupId>org.apache.hadoop.thirdparty</groupId>
+            <artifactId>hadoop-shaded-guava</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-annotations</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.xml.bind</groupId>
+            <artifactId>jaxb-api</artifactId>
+          </exclusion>
+
+          <!-- We're using "org.slf4j:jcl-over-slf4j" -->
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>org.codehaus.jackson</groupId>
+        <artifactId>jackson-mapper-asl</artifactId>
+        <version>${codehaus.jackson.version}</version>
+        <scope>test</scope>
+      </dependency>
+
+    </dependencies>
+
+  </dependencyManagement>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <properties>
+        <jni.dir>${project.basedir}/../core/target/release</jni.dir>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>Win-x86</id>
+      <activation>
+        <os>
+          <family>Windows</family>
+          <arch>x86</arch>
+        </os>
+      </activation>
+      <properties>
+        <platform>win32</platform>
+        <arch>x86_64</arch>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>Darwin-x86</id>
+      <activation>
+        <os>
+          <family>mac</family>
+          <arch>x86</arch>
+        </os>
+      </activation>
+      <properties>
+        <platform>darwin</platform>
+        <arch>x86_64</arch>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>Darwin-aarch64</id>
+      <activation>
+        <os>
+          <family>mac</family>
+          <arch>aarch64</arch>
+        </os>
+      </activation>
+      <properties>
+        <platform>darwin</platform>
+        <arch>aarch64</arch>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>Linux-amd64</id>
+      <activation>
+        <os>
+          <family>Linux</family>
+          <arch>amd64</arch>
+        </os>
+      </activation>
+      <properties>
+        <platform>linux</platform>
+        <arch>amd64</arch>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>Linux-aarch64</id>
+      <activation>
+        <os>
+          <family>Linux</family>
+          <arch>aarch64</arch>
+        </os>
+      </activation>
+      <properties>
+        <platform>linux</platform>
+        <arch>aarch64</arch>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-3.2</id>
+      <properties>
+        <scala.version>2.12.15</scala.version>
+        <spark.version>3.2.2</spark.version>
+        <spark.version.short>3.2</spark.version.short>
+        <parquet.version>1.12.0</parquet.version>
+        <java.version>1.8</java.version>
+        <maven.compiler.source>${java.version}</maven.compiler.source>
+        <maven.compiler.target>${java.version}</maven.compiler.target>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-3.3</id>
+      <properties>
+        <scala.version>2.12.15</scala.version>
+        <spark.version>3.3.2</spark.version>
+        <spark.version.short>3.3</spark.version.short>
+        <parquet.version>1.12.0</parquet.version>
+        <java.version>11</java.version>
+        <maven.compiler.source>${java.version}</maven.compiler.source>
+        <maven.compiler.target>${java.version}</maven.compiler.target>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-3.4</id>
+      <properties>
+        <scala.version>2.12.17</scala.version>
+        <spark.version.short>3.4</spark.version.short>
+        <parquet.version>1.13.1</parquet.version>
+        <java.version>11</java.version>
+        <maven.compiler.source>${java.version}</maven.compiler.source>
+        <maven.compiler.target>${java.version}</maven.compiler.target>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>scala-2.13</id>
+      <properties>
+        <scala.version>2.13.8</scala.version>
+        <scala.binary.version>2.13</scala.binary.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>semanticdb</id>
+      <properties>
+        <scalastyle.skip>true</scalastyle.skip>
+        <spotless.check.skip>true</spotless.check.skip>
+        <enforcer.skip>true</enforcer.skip>
+      </properties>
+      <build>
+        <pluginManagement>
+          <plugins>
+            <plugin>
+              <groupId>net.alchim31.maven</groupId>
+              <artifactId>scala-maven-plugin</artifactId>
+              <version>${scala.plugin.version}</version>
+              <executions>
+                <execution>
+                  <goals>
+                    <goal>compile</goal>
+                    <goal>testCompile</goal>
+                  </goals>
+                </execution>
+              </executions>
+              <configuration>
+                <args>
+                  <arg>-Ywarn-unused</arg> <!-- if you need exactly RemoveUnused -->
+                </args>
+                <compilerPlugins>
+                  <compilerPlugin>
+                    <groupId>org.scalameta</groupId>
+                    <artifactId>semanticdb-scalac_${scala.version}</artifactId>
+                    <version>4.7.5</version>
+                  </compilerPlugin>
+                </compilerPlugins>
+              </configuration>
+            </plugin>
+            <plugin>
+              <groupId>io.github.evis</groupId>
+              <artifactId>scalafix-maven-plugin_${scala.binary.version}</artifactId>
+              <version>0.1.7_0.10.4</version>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
+  </profiles>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <version>${scala.plugin.version}</version>
+          <executions>
+            <execution>
+              <id>eclipse-add-source</id>
+              <goals>
+                <goal>add-source</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>scala-compile-first</id>
+              <phase>process-resources</phase>
+              <goals>
+                <goal>compile</goal>
+                <goal>add-source</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>scala-test-compile-first</id>
+              <phase>process-test-resources</phase>
+              <goals>
+                <goal>testCompile</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            -->
+            <scalaVersion>${scala.version}</scalaVersion>
+            <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
+            <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
+            <recompileMode>incremental</recompileMode>
+            <args>
+              <arg>-unchecked</arg>
+              <arg>-deprecation</arg>
+              <arg>-feature</arg>
+              <arg>-explaintypes</arg>
+              <arg>-Xlint:adapted-args</arg>
+            </args>
+            <jvmArgs>
+              <jvmArg>-Xms1024m</jvmArg>
+              <jvmArg>-Xmx1024m</jvmArg>
+            </jvmArgs>
+            <javacArgs>
+              <javacArg>-source</javacArg>
+              <javacArg>${maven.compiler.source}</javacArg>
+              <javacArg>-target</javacArg>
+              <javacArg>${maven.compiler.target}</javacArg>
+              <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
+            </javacArgs>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.scalatest</groupId>
+          <artifactId>scalatest-maven-plugin</artifactId>
+          <version>${scalatest-maven-plugin.version}</version>
+          <configuration>
+            <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+            <junitxml>.</junitxml>
+            <filereports>SparkTestSuite.txt</filereports>
+            <stderr/>
+            <tagsToExclude>org.apache.comet.IntegrationTestSuite</tagsToExclude>
+            <systemProperties>
+              <!-- emit test logs to target/unit-tests.log -->
+              <log4j.configurationFile>file:src/test/resources/log4j2.properties</log4j.configurationFile>
+              <java.awt.headless>true</java.awt.headless>
+              <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+            </systemProperties>
+          </configuration>
+          <executions>
+            <execution>
+              <id>test</id>
+              <goals>
+                <goal>test</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-shade-plugin</artifactId>
+          <version>3.2.4</version>
+          <dependencies>
+            <dependency>
+              <groupId>org.ow2.asm</groupId>
+              <artifactId>asm</artifactId>
+              <version>9.1</version>
+            </dependency>
+            <dependency>
+              <groupId>org.ow2.asm</groupId>
+              <artifactId>asm-commons</artifactId>
+              <version>9.1</version>
+            </dependency>
+          </dependencies>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>3.1.0</version>
+          <configuration>
+            <systemProperties>
+              <log4j.configurationFile>file:src/test/resources/log4j2.properties</log4j.configurationFile>
+            </systemProperties>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-source-plugin</artifactId>
+          <version>3.1.0</version>
+          <configuration>
+            <attach>true</attach>
+          </configuration>
+          <executions>
+            <execution>
+              <id>create-source-jar</id>
+              <goals>
+                <goal>jar-no-fork</goal>
+                <goal>test-jar-no-fork</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-failsafe-plugin</artifactId>
+          <version>3.1.0</version>
+        </plugin>
+        <plugin>
+          <groupId>com.diffplug.spotless</groupId>
+          <artifactId>spotless-maven-plugin</artifactId>
+          <version>${spotless.version}</version>
+          <configuration>
+            <java>
+              <toggleOffOn />
+              <googleJavaFormat />
+              <removeUnusedImports />
+              <importOrder>
+                <order>java|javax,scala,org,org.apache,com,org.apache.comet,\#,\#org.apache.comet</order>
+              </importOrder>
+            </java>
+            <scala>
+              <toggleOffOn />
+              <scalafmt>
+                <version>3.6.1</version>
+                <file>${maven.multiModuleProjectDirectory}/scalafmt.conf</file>
+              </scalafmt>
+            </scala>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>flatten-maven-plugin</artifactId>
+          <version>1.3.0</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+    <plugins>
+      <!--
+      <plugin>
+        <groupId>org.scalastyle</groupId>
+        <artifactId>scalastyle-maven-plugin</artifactId>
+        <version>1.0.0</version>
+        <configuration>
+          <verbose>false</verbose>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+          <failOnWarning>false</failOnWarning>
+          <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+          <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+          <configLocation>${maven.multiModuleProjectDirectory}/dev/scalastyle-config.xml</configLocation>
+          <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
+          <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+          <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+            <phase>compile</phase>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>com.diffplug.spotless</groupId>
+        <artifactId>spotless-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+            <phase>compile</phase>
+          </execution>
+        </executions>
+      </plugin>
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+            <configuration>
+              <trimStackTrace>false</trimStackTrace>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <version>0.16</version>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <consoleOutput>true</consoleOutput>
+          <excludes>
+            <exclude>**/*.iml</exclude>
+            <exclude>**/*.log</exclude>
+            <exclude>**/*.md.vm</exclude>
+            <exclude>**/.classpath</exclude>
+            <exclude>**/.project</exclude>
+            <exclude>**/.settings/**</exclude>
+            <exclude>**/build/**</exclude>
+            <exclude>**/target/**</exclude>
+            <exclude>.git/**</exclude>
+            <exclude>.gitignore</exclude>
+            <exclude>.gitmodules</exclude>
+            <exclude>**/.idea/**</exclude>
+            <exclude>**/dependency-reduced-pom.xml</exclude>
+            <exclude>**/testdata/**</exclude>
+            <exclude>**/.lldbinit</exclude>
+            <exclude>rust-toolchain</exclude>
+            <exclude>Makefile</exclude>
+            <exclude>dev/Dockerfile*</exclude>
+            <exclude>dev/diff/**</exclude>
+            <exclude>dev/deploy-file</exclude>
+            <exclude>**/test/resources/**</exclude>
+            <exclude>**/benchmarks/*.txt</exclude>
+            <exclude>**/inspections/*.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <version>3.1.0</version>
+        <executions>
+          <execution>
+            <id>no-duplicate-declared-dependencies</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <banCircularDependencies/>
+                <banDuplicatePomDependencyVersions/>
+                <banDuplicateClasses>
+                  <scopes>
+                    <scope>compile</scope>
+                    <scope>provided</scope>
+                  </scopes>
+                  <ignoreClasses>
+                    <ignoreClass>org.apache.spark.unused.UnusedStubClass</ignoreClass>
+                  </ignoreClasses>
+                  <dependencies>
+                    <dependency>
+                      <groupId>org.apache.spark</groupId>
+                      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+                      <ignoreClasses>
+                        <!-- the followings conflict between spark-sql and spark-sql:test -->
+                        <ignoreClass>javax.annotation.meta.TypeQualifier</ignoreClass>
+                        <ignoreClass>javax.annotation.Nonnull</ignoreClass>
+                        <ignoreClass>javax.annotation.meta.When</ignoreClass>
+                        <ignoreClass>javax.annotation.Nonnull$Checker</ignoreClass>
+                        <ignoreClass>javax.annotation.meta.TypeQualifierValidator</ignoreClass>
+                        <!-- this class is not properly excluded from comet-spark right now -->
+                        <ignoreClass>org.apache.parquet.filter2.predicate.SparkFilterApi</ignoreClass>
+                      </ignoreClasses>
+                    </dependency>
+                    <dependency>
+                      <groupId>com.google.code.findbugs</groupId>
+                      <artifactId>jsr305</artifactId>
+                      <ignoreClasses>
+                        <!-- the followings conflict between spark-sql and findbugs -->
+                        <ignoreClass>javax.annotation.meta.TypeQualifier</ignoreClass>
+                        <ignoreClass>javax.annotation.Nonnull</ignoreClass>
+                        <ignoreClass>javax.annotation.meta.When</ignoreClass>
+                        <ignoreClass>javax.annotation.Nonnull$Checker</ignoreClass>
+                        <ignoreClass>javax.annotation.meta.TypeQualifierValidator</ignoreClass>
+                        <ignoreClass>javax.annotation.Nullable</ignoreClass>
+                        <ignoreClass>javax.annotation.meta.TypeQualifierNickname</ignoreClass>
+                      </ignoreClasses>
+                    </dependency>
+                  </dependencies>
+                  <findAllDuplicates>true</findAllDuplicates>
+                  <ignoreWhenIdentical>true</ignoreWhenIdentical>
+                </banDuplicateClasses>
+              </rules>
+            </configuration>
+          </execution>
+        </executions>
+        <dependencies>
+          <dependency>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>extra-enforcer-rules</artifactId>
+            <version>1.6.1</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>flatten-maven-plugin</artifactId>
+        <executions>
+          <!-- enable flattening -->
+          <execution>
+            <id>flatten</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>flatten</goal>
+            </goals>
+          </execution>
+          <!-- ensure proper cleanup -->
+          <execution>
+            <id>flatten.clean</id>
+            <phase>clean</phase>
+            <goals>
+              <goal>clean</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/rust-toolchain b/rust-toolchain
new file mode 100644
index 000000000..55d7013e4
--- /dev/null
+++ b/rust-toolchain
@@ -0,0 +1 @@
+nightly-2023-09-05
diff --git a/scalafmt.conf b/scalafmt.conf
new file mode 100644
index 000000000..7c8c2b381
--- /dev/null
+++ b/scalafmt.conf
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+align = none
+align.openParenDefnSite = false
+align.openParenCallSite = false
+align.tokens = []
+importSelectors = "singleLine"
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses.preset = false
+docstrings.style = Asterisk
+maxColumn = 98
+runner.dialect = scala212
+version = 3.6.1
+
+rewrite.rules = [Imports]
+rewrite.imports.sort = scalastyle
+rewrite.imports.groups = [
+  ["java\\..*", "javax\\..*"],
+  ["scala\\..*"],
+  ["org\\..*"],
+  ["org\\.apache\\..*"],
+  ["org\\.apache\\.comet\\..*"],
+  ["com\\..*"],
+]
diff --git a/spark-integration/pom.xml b/spark-integration/pom.xml
new file mode 100644
index 000000000..6af59ac69
--- /dev/null
+++ b/spark-integration/pom.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.comet</groupId>
+        <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
+        <version>0.1.0-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>comet-spark-integration-spark${spark.version.short}_${scala.binary.version}</artifactId>
+    <name>comet-spark-integration</name>
+    <packaging>pom</packaging>
+
+  <properties>
+    <!-- Reverse default (skip installation), and then enable only for child modules -->
+    <maven.deploy.skip>false</maven.deploy.skip>
+  </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.comet</groupId>
+            <artifactId>comet-spark-spark${spark.version.short}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+              <!-- This is shaded into the jar -->
+              <exclusion>
+                <groupId>org.apache.comet</groupId>
+                <artifactId>comet-common-spark${spark.version.short}_${scala.binary.version}</artifactId>
+              </exclusion>
+            </exclusions>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <!-- create a maven pom property that has all of our dependencies.
+                       below in the integration-test phase we'll pass this list
+                       of paths to our jar checker script.
+                    -->
+                    <execution>
+                        <id>put-client-artifacts-in-a-property</id>
+                        <phase>pre-integration-test</phase>
+                        <goals>
+                            <goal>build-classpath</goal>
+                        </goals>
+                        <configuration>
+                            <excludeTransitive>true</excludeTransitive>
+                            <pathSeparator>;</pathSeparator>
+                            <outputProperty>comet-artifacts</outputProperty>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>check-jar-contents</id>
+                        <phase>integration-test</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>bash</executable>
+                            <workingDirectory>${project.build.testOutputDirectory}</workingDirectory>
+                            <requiresOnline>false</requiresOnline>
+                            <arguments>
+                                <argument>${project.basedir}/../dev/ensure-jars-have-correct-contents.sh</argument>
+                                <argument>${comet-artifacts}</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
diff --git a/spark/README.md b/spark/README.md
new file mode 100644
index 000000000..6f45d9212
--- /dev/null
+++ b/spark/README.md
@@ -0,0 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache Spark Plugin for Comet
+
+This folder implements a plugin for Apache Spark to use Comet native library, using Spark's extension framework.
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 000000000..d3afa48ec
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,238 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.comet</groupId>
+    <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
+    <version>0.1.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>comet-spark-spark${spark.version.short}_${scala.binary.version}</artifactId>
+  <name>comet-spark</name>
+
+  <properties>
+    <!-- Reverse default (skip installation), and then enable only for child modules -->
+    <maven.deploy.skip>false</maven.deploy.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.comet</groupId>
+      <artifactId>comet-common-spark${spark.version.short}_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.arrow</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatestplus</groupId>
+      <artifactId>junit-4-13_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+      <classifier>tests</classifier>
+      <!-- Note we don't use test scope for this artifact. This is because it's only needed
+         to provide InMemoryKMS class that is shaded below, to make Spark test happy. -->
+    </dependency>
+    <!-- We shade & relocate Arrow dependencies in comet-common, so comet-spark module no longer
+         depends on Arrow. However, when running `mvn test` we still need Arrow classes in the
+         classpath, since the Maven shading happens in 'package' phase which is after 'test' -->
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory-unsafe</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-c-data</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>com.github.os72</groupId>
+        <artifactId>protoc-jar-maven-plugin</artifactId>
+        <version>3.11.4</version>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+              <inputDirectories>
+                <include>../core/src/execution/proto</include>
+              </inputDirectories>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>true</createSourcesJar>
+              <shadeSourcesContent>true</shadeSourcesContent>
+              <shadedArtifactAttached>false</shadedArtifactAttached>
+              <createDependencyReducedPom>true</createDependencyReducedPom>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.comet:comet-common-spark${spark.version.short}_${scala.binary.version}</include>
+                  <!-- Relocate Protobuf since Spark uses 2.5.0 while Comet uses 3.x -->
+                  <include>com.google.protobuf:protobuf-java</include>
+                  <include>com.google.guava:guava</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>**/*.proto</exclude>
+                    <exclude>**/*.thrift</exclude>
+                    <exclude>git.properties</exclude>
+                    <exclude>log4j.properties</exclude>
+                    <exclude>log4j2.properties</exclude>
+                    <exclude>**/SparkFilterApi.*</exclude>
+                  </excludes>
+                </filter>
+                <filter>
+                  <artifact>org.apache.parquet:parquet-hadoop:tests</artifact>
+                  <includes>
+                    <!-- Used by Spark test `ParquetEncryptionSuite` -->
+                    <include>org/apache/parquet/crypto/keytools/mocks/InMemoryKMS*</include>
+                  </includes>
+                </filter>
+              </filters>
+              <relocations>
+                <relocation>
+                  <pattern>com.google.protobuf</pattern>
+                  <shadedPattern>${comet.shade.packageName}.protobuf</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.google.common</pattern>
+                  <shadedPattern>${comet.shade.packageName}.guava</shadedPattern>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+            <configuration>
+              <trimStackTrace>false</trimStackTrace>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/spark/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java b/spark/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java
new file mode 100644
index 000000000..55e9cf5c6
--- /dev/null
+++ b/spark/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.filter2.predicate;
+
+import org.apache.parquet.filter2.predicate.Operators.*;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+
+/**
+ * Copied from Spark 3.2, in order to fix Parquet shading issue.
+ *
+ * <p>TODO: find a way to remove this duplication
+ */
+public final class SparkFilterApi {
+  public static IntColumn intColumn(String[] path) {
+    return new IntColumn(ColumnPath.get(path));
+  }
+
+  public static LongColumn longColumn(String[] path) {
+    return new LongColumn(ColumnPath.get(path));
+  }
+
+  public static FloatColumn floatColumn(String[] path) {
+    return new FloatColumn(ColumnPath.get(path));
+  }
+
+  public static DoubleColumn doubleColumn(String[] path) {
+    return new DoubleColumn(ColumnPath.get(path));
+  }
+
+  public static BooleanColumn booleanColumn(String[] path) {
+    return new BooleanColumn(ColumnPath.get(path));
+  }
+
+  public static BinaryColumn binaryColumn(String[] path) {
+    return new BinaryColumn(ColumnPath.get(path));
+  }
+}
diff --git a/spark/src/main/java/org/apache/spark/sql/comet/CometScalarSubquery.java b/spark/src/main/java/org/apache/spark/sql/comet/CometScalarSubquery.java
new file mode 100644
index 000000000..98dc02542
--- /dev/null
+++ b/spark/src/main/java/org/apache/spark/sql/comet/CometScalarSubquery.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet;
+
+import java.util.HashMap;
+
+import org.apache.spark.sql.execution.ScalarSubquery;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.unsafe.types.UTF8String;
+
+import org.apache.comet.CometRuntimeException;
+
+/** A helper class to execute scalar subqueries and retrieve subquery results from native code. */
+public class CometScalarSubquery {
+  /**
+   * A map from (planId, subqueryId) to the corresponding ScalarSubquery. We cannot simply use
+   * `subqueryId` because same query plan may be executed multiple times in same executor (i.e., JVM
+   * instance). For such cases, if we delete the ScalarSubquery from the map after the first
+   * execution, the second execution will fail to find the ScalarSubquery if the native code is
+   * still running.
+   */
+  private static final HashMap<Long, HashMap<Long, ScalarSubquery>> subqueryMap = new HashMap<>();
+
+  public static synchronized void setSubquery(long planId, ScalarSubquery subquery) {
+    if (!subqueryMap.containsKey(planId)) {
+      subqueryMap.put(planId, new HashMap<>());
+    }
+
+    subqueryMap.get(planId).put(subquery.exprId().id(), subquery);
+  }
+
+  public static synchronized void removeSubquery(long planId, ScalarSubquery subquery) {
+    subqueryMap.get(planId).remove(subquery.exprId().id());
+
+    if (subqueryMap.get(planId).isEmpty()) {
+      subqueryMap.remove(planId);
+    }
+  }
+
+  /** Retrieve the result of subquery. */
+  private static Object getSubquery(Long planId, Long id) {
+    if (!subqueryMap.containsKey(planId)) {
+      throw new CometRuntimeException("Subquery " + id + " not found for plan " + planId + ".");
+    }
+
+    return subqueryMap.get(planId).get(id).eval(null);
+  }
+
+  /** Check if the result of a subquery is null. Called from native code. */
+  public static boolean isNull(long planId, long id) {
+    return getSubquery(planId, id) == null;
+  }
+
+  /** Get the result of a subquery as a boolean. Called from native code. */
+  public static boolean getBoolean(long planId, long id) {
+    return (boolean) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a byte. Called from native code. */
+  public static byte getByte(long planId, long id) {
+    return (byte) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a short. Called from native code. */
+  public static short getShort(long planId, long id) {
+    return (short) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as an integer. Called from native code. */
+  public static int getInt(long planId, long id) {
+    return (int) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a long. Called from native code. */
+  public static long getLong(long planId, long id) {
+    return (long) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a float. Called from native code. */
+  public static float getFloat(long planId, long id) {
+    return (float) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a double. Called from native code. */
+  public static double getDouble(long planId, long id) {
+    return (double) getSubquery(planId, id);
+  }
+
+  /** Get the result of a subquery as a decimal represented as bytes. Called from native code. */
+  public static byte[] getDecimal(long planId, long id) {
+    return ((Decimal) getSubquery(planId, id)).toJavaBigDecimal().unscaledValue().toByteArray();
+  }
+
+  /** Get the result of a subquery as a string. Called from native code. */
+  public static String getString(long planId, long id) {
+    return ((UTF8String) getSubquery(planId, id)).toString();
+  }
+
+  /** Get the result of a subquery as a binary. Called from native code. */
+  public static byte[] getBinary(long planId, long id) {
+    return (byte[]) getSubquery(planId, id);
+  }
+}
diff --git a/spark/src/main/resources/log4j2.properties b/spark/src/main/resources/log4j2.properties
new file mode 100644
index 000000000..04cdf8533
--- /dev/null
+++ b/spark/src/main/resources/log4j2.properties
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
+
diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
new file mode 100644
index 000000000..029be29d8
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.util.HashMap
+
+import org.apache.spark._
+import org.apache.spark.sql.comet.CometMetricNode
+import org.apache.spark.sql.vectorized._
+
+import org.apache.comet.vector.NativeUtil
+
+/**
+ * An iterator class used to execute Comet native query. It takes an input iterator which comes
+ * from Comet Scan and is expected to produce batches of Arrow Arrays. During consuming this
+ * iterator, it will consume input iterator and pass Arrow Arrays to Comet native engine by
+ * addresses. Even after the end of input iterator, this iterator still possibly continues
+ * executing native query as there might be blocking operators such as Sort, Aggregate. The API
+ * `hasNext` can be used to check if it is the end of this iterator (i.e. the native query is
+ * done).
+ *
+ * @param inputs
+ *   The input iterators producing sequence of batches of Arrow Arrays.
+ * @param protobufQueryPlan
+ *   The serialized bytes of Spark execution plan.
+ */
+class CometExecIterator(
+    val id: Long,
+    inputs: Seq[Iterator[ColumnarBatch]],
+    protobufQueryPlan: Array[Byte],
+    configs: HashMap[String, String],
+    nativeMetrics: CometMetricNode)
+    extends Iterator[ColumnarBatch] {
+
+  private val nativeLib = new Native()
+  private val plan = nativeLib.createPlan(id, configs, protobufQueryPlan, nativeMetrics)
+  private val nativeUtil = new NativeUtil
+  private var nextBatch: Option[ColumnarBatch] = None
+  private var currentBatch: ColumnarBatch = null
+  private var closed: Boolean = false
+
+  private def peekNext(): ExecutionState = {
+    val result = nativeLib.peekNext(plan)
+    val flag = result(0)
+
+    if (flag == 0) Pending
+    else if (flag == 1) {
+      val numRows = result(1)
+      val addresses = result.slice(2, result.length)
+      Batch(numRows = numRows.toInt, addresses = addresses)
+    } else {
+      throw new IllegalStateException(s"Invalid native flag: $flag")
+    }
+  }
+
+  private def executeNative(
+      input: Array[Array[Long]],
+      finishes: Array[Boolean],
+      numRows: Int): ExecutionState = {
+    val result = nativeLib.executePlan(plan, input, finishes, numRows)
+    val flag = result(0)
+    if (flag == -1) EOF
+    else if (flag == 0) Pending
+    else if (flag == 1) {
+      val numRows = result(1)
+      val addresses = result.slice(2, result.length)
+      Batch(numRows = numRows.toInt, addresses = addresses)
+    } else {
+      throw new IllegalStateException(s"Invalid native flag: $flag")
+    }
+  }
+
+  /** Execution result from Comet native */
+  trait ExecutionState
+
+  /** A new batch is available */
+  case class Batch(numRows: Int, addresses: Array[Long]) extends ExecutionState
+
+  /** The execution is finished - no more batch */
+  case object EOF extends ExecutionState
+
+  /** The execution is pending (e.g., blocking operator is still consuming batches) */
+  case object Pending extends ExecutionState
+
+  private def peek(): Option[ColumnarBatch] = {
+    peekNext() match {
+      case Batch(numRows, addresses) =>
+        val cometVectors = nativeUtil.importVector(addresses)
+        Some(new ColumnarBatch(cometVectors.toArray, numRows))
+      case _ =>
+        None
+    }
+  }
+
+  def getNextBatch(
+      inputArrays: Array[Array[Long]],
+      finishes: Array[Boolean],
+      numRows: Int): Option[ColumnarBatch] = {
+    executeNative(inputArrays, finishes, numRows) match {
+      case EOF => None
+      case Batch(numRows, addresses) =>
+        val cometVectors = nativeUtil.importVector(addresses)
+        Some(new ColumnarBatch(cometVectors.toArray, numRows))
+      case Pending =>
+        if (finishes.forall(_ == true)) {
+          // Once no input, we should not get a pending flag.
+          throw new SparkException(
+            "Native execution should not be pending after reaching end of input batches")
+        }
+        // For pending, we keep reading next input.
+        None
+    }
+  }
+
+  override def hasNext: Boolean = {
+    if (closed) return false
+
+    if (nextBatch.isDefined) {
+      return true
+    }
+    // Before we pull next input batch, check if there is next output batch available
+    // from native side. Some operators might still have output batches ready produced
+    // from last input batch. For example, `expand` operator will produce output batches
+    // based on the input batch.
+    nextBatch = peek()
+
+    // Next input batches are available, execute native query plan with the inputs until
+    // we get next output batch ready
+    while (nextBatch.isEmpty && inputs.exists(_.hasNext)) {
+      val batches = inputs.map {
+        case input if input.hasNext => Some(input.next())
+        case _ => None
+      }
+
+      var numRows = -1
+      val (batchAddresses, finishes) = batches
+        .map {
+          case Some(batch) =>
+            numRows = batch.numRows()
+            (nativeUtil.exportBatch(batch), false)
+          case None => (Array.empty[Long], true)
+        }
+        .toArray
+        .unzip
+
+      // At least one input batch should be consumed
+      assert(numRows != -1, "No input batch has been consumed")
+
+      nextBatch = getNextBatch(batchAddresses, finishes, numRows)
+    }
+
+    // After we consume to the end of the iterators, the native side still can output batches
+    // back because there might be blocking operators e.g. Sort. We continue ask for batches
+    // until it returns empty columns.
+    if (nextBatch.isEmpty) {
+      val finishes = inputs.map(_ => true).toArray
+      nextBatch = getNextBatch(inputs.map(_ => Array.empty[Long]).toArray, finishes, 0)
+      val hasNext = nextBatch.isDefined
+      if (!hasNext) {
+        close()
+      }
+      hasNext
+    } else {
+      true
+    }
+  }
+
+  override def next(): ColumnarBatch = {
+    if (currentBatch != null) {
+      // Eagerly release Arrow Arrays in the previous batch
+      currentBatch.close()
+      currentBatch = null
+    }
+
+    if (nextBatch.isEmpty && !hasNext) {
+      throw new NoSuchElementException("No more element")
+    }
+
+    currentBatch = nextBatch.get
+    nextBatch = None
+    currentBatch
+  }
+
+  def close(): Unit = synchronized {
+    if (!closed) {
+      if (currentBatch != null) {
+        currentBatch.close()
+        currentBatch = null
+      }
+      nativeLib.releasePlan(plan)
+      // The allocator thoughts the exported ArrowArray and ArrowSchema structs are not released,
+      // so it will report:
+      // Caused by: java.lang.IllegalStateException: Memory was leaked by query.
+      // Memory leaked: (516) Allocator(ROOT) 0/516/808/9223372036854775807 (res/actual/peak/limit)
+      // Suspect this seems a false positive leak, because there is no reported memory leak at JVM
+      // when profiling. `allocator` reports a leak because it calculates the accumulated number
+      // of memory allocated for ArrowArray and ArrowSchema. But these exported ones will be
+      // released in native side later.
+      // More to clarify it. For ArrowArray and ArrowSchema, Arrow will put a release field into the
+      // memory region which is a callback function pointer (C function) that could be called to
+      // release these structs in native code too. Once we wrap their memory addresses at native
+      // side using FFI ArrowArray and ArrowSchema, and drop them later, the callback function will
+      // be called to release the memory.
+      // But at JVM, the allocator doesn't know about this fact so it still keeps the accumulated
+      // number.
+      // Tried to manually do `release` and `close` that can make the allocator happy, but it will
+      // cause JVM runtime failure.
+
+      // allocator.close()
+      closed = true
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
new file mode 100644
index 000000000..1c0f7bbf7
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.nio.ByteOrder
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.SparkSessionExtensions
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.comet._
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+import org.apache.comet.CometConf._
+import org.apache.comet.CometSparkSessionExtensions.{isANSIEnabled, isCometEnabled, isCometExecEnabled, isCometOperatorEnabled, isCometScan, isCometScanEnabled, isSchemaSupported}
+import org.apache.comet.parquet.{CometParquetScan, SupportsComet}
+import org.apache.comet.serde.OperatorOuterClass.Operator
+import org.apache.comet.serde.QueryPlanSerde
+import org.apache.comet.shims.ShimCometSparkSessionExtensions
+
+class CometSparkSessionExtensions
+    extends (SparkSessionExtensions => Unit)
+    with Logging
+    with ShimCometSparkSessionExtensions {
+  override def apply(extensions: SparkSessionExtensions): Unit = {
+    extensions.injectColumnar { session => CometScanColumnar(session) }
+    extensions.injectColumnar { session => CometExecColumnar(session) }
+    extensions.injectQueryStagePrepRule { session => CometScanRule(session) }
+    extensions.injectQueryStagePrepRule { session => CometExecRule(session) }
+  }
+
+  case class CometScanColumnar(session: SparkSession) extends ColumnarRule {
+    override def preColumnarTransitions: Rule[SparkPlan] = CometScanRule(session)
+  }
+
+  case class CometExecColumnar(session: SparkSession) extends ColumnarRule {
+    override def preColumnarTransitions: Rule[SparkPlan] = CometExecRule(session)
+  }
+
+  case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] {
+    override def apply(plan: SparkPlan): SparkPlan = {
+      if (!isCometEnabled(conf) || !isCometScanEnabled(conf)) plan
+      else {
+        plan.transform {
+          // data source V2
+          case scanExec: BatchScanExec
+              if scanExec.scan.isInstanceOf[ParquetScan] &&
+                isSchemaSupported(scanExec.scan.asInstanceOf[ParquetScan].readDataSchema) &&
+                isSchemaSupported(scanExec.scan.asInstanceOf[ParquetScan].readPartitionSchema) &&
+                // Comet does not support pushedAggregate
+                getPushedAggregate(scanExec.scan.asInstanceOf[ParquetScan]).isEmpty =>
+            val cometScan = CometParquetScan(scanExec.scan.asInstanceOf[ParquetScan])
+            logInfo("Comet extension enabled for Scan")
+            CometBatchScanExec(
+              scanExec.copy(scan = cometScan),
+              runtimeFilters = scanExec.runtimeFilters)
+
+          // iceberg scan
+          case scanExec: BatchScanExec =>
+            if (isSchemaSupported(scanExec.scan.readSchema())) {
+              scanExec.scan match {
+                case s: SupportsComet if s.isCometEnabled =>
+                  logInfo(s"Comet extension enabled for ${scanExec.scan.getClass.getSimpleName}")
+                  // When reading from Iceberg, we automatically enable type promotion
+                  SQLConf.get.setConfString(COMET_SCHEMA_EVOLUTION_ENABLED.key, "true")
+                  CometBatchScanExec(
+                    scanExec.clone().asInstanceOf[BatchScanExec],
+                    runtimeFilters = scanExec.runtimeFilters)
+                case _ =>
+                  logInfo(
+                    "Comet extension is not enabled for " +
+                      s"${scanExec.scan.getClass.getSimpleName}: not enabled on data source side")
+                  scanExec
+              }
+            } else {
+              logInfo(
+                "Comet extension is not enabled for " +
+                  s"${scanExec.scan.getClass.getSimpleName}: Schema not supported")
+              scanExec
+            }
+
+          // data source V1
+          case scanExec @ FileSourceScanExec(
+                HadoopFsRelation(_, partitionSchema, _, _, _: ParquetFileFormat, _),
+                _: Seq[AttributeReference],
+                requiredSchema,
+                _,
+                _,
+                _,
+                _,
+                _,
+                _) if isSchemaSupported(requiredSchema) && isSchemaSupported(partitionSchema) =>
+            logInfo("Comet extension enabled for v1 Scan")
+            CometScanExec(scanExec, session)
+        }
+      }
+    }
+  }
+
+  case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {
+    private def isCometNative(op: SparkPlan): Boolean = op.isInstanceOf[CometNativeExec]
+
+    // spotless:off
+    /**
+     * Tries to transform a Spark physical plan into a Comet plan.
+     *
+     * This rule traverses bottom-up from the original Spark plan and for each plan node, there
+     * are a few cases to consider:
+     *
+     * 1. The child(ren) of the current node `p` cannot be converted to native
+     *   In this case, we'll simply return the original Spark plan, since Comet native
+     *   execution cannot start from an arbitrary Spark operator (unless it is special node
+     *   such as scan or sink such as union etc., which are wrapped by
+     *   `CometScanWrapper` and `CometSinkPlaceHolder` respectively).
+     *
+     * 2. The child(ren) of the current node `p` can be converted to native
+     *   There are two sub-cases for this scenario: 1) This node `p` can also be converted to
+     *   native. In this case, we'll create a new native Comet operator for `p` and connect it with
+     *   its previously converted child(ren); 2) This node `p` cannot be converted to native. In
+     *   this case, similar to 1) above, we simply return `p` as it is. Its child(ren) would still
+     *   be native Comet operators.
+     *
+     * After this rule finishes, we'll do another pass on the final plan to convert all adjacent
+     * Comet native operators into a single native execution block. Please see where
+     * `convertBlock` is called below.
+     *
+     * Here are a few examples:
+     *
+     *     Scan                       ======>             CometScan
+     *      |                                                |
+     *     Filter                                         CometFilter
+     *      |                                                |
+     *     HashAggregate                                  CometHashAggregate
+     *      |                                                |
+     *     Exchange                                       CometExchange
+     *      |                                                |
+     *     HashAggregate                                  CometHashAggregate
+     *      |                                                |
+     *     UnsupportedOperator                            UnsupportedOperator
+     *
+     * Native execution doesn't necessarily have to start from `CometScan`:
+     *
+     *     Scan                       =======>            CometScan
+     *      |                                                |
+     *     UnsupportedOperator                            UnsupportedOperator
+     *      |                                                |
+     *     HashAggregate                                  HashAggregate
+     *      |                                                |
+     *     Exchange                                       CometExchange
+     *      |                                                |
+     *     HashAggregate                                  CometHashAggregate
+     *      |                                                |
+     *     UnsupportedOperator                            UnsupportedOperator
+     *
+     * A sink can also be Comet operators other than `CometExchange`, for instance `CometUnion`:
+     *
+     *     Scan   Scan                =======>          CometScan CometScan
+     *      |      |                                       |         |
+     *     Filter Filter                                CometFilter CometFilter
+     *      |      |                                       |         |
+     *        Union                                         CometUnion
+     *          |                                               |
+     *        Project                                       CometProject
+     */
+    // spotless:on
+    private def transform(plan: SparkPlan): SparkPlan = {
+      def transform1(op: UnaryExecNode): Option[Operator] = {
+        op.child match {
+          case childNativeOp: CometNativeExec =>
+            QueryPlanSerde.operator2Proto(op, childNativeOp.nativeOp)
+          case _ =>
+            None
+        }
+      }
+
+      plan.transformUp {
+        case op if isCometScan(op) =>
+          val nativeOp = QueryPlanSerde.operator2Proto(op).get
+          CometScanWrapper(nativeOp, op)
+
+        case op: ProjectExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometProjectExec(nativeOp, op, op.projectList, op.output, op.child)
+            case None =>
+              op
+          }
+
+        case op: FilterExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometFilterExec(nativeOp, op, op.condition, op.child)
+            case None =>
+              op
+          }
+
+        case op: SortExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometSortExec(nativeOp, op, op.sortOrder, op.child)
+            case None =>
+              op
+          }
+
+        case op: LocalLimitExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometLocalLimitExec(nativeOp, op, op.limit, op.child)
+            case None =>
+              op
+          }
+
+        case op: GlobalLimitExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometGlobalLimitExec(nativeOp, op, op.limit, op.child)
+            case None =>
+              op
+          }
+
+        case op: ExpandExec =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              CometExpandExec(nativeOp, op, op.projections, op.child)
+            case None =>
+              op
+          }
+
+        case op @ HashAggregateExec(_, _, _, groupingExprs, aggExprs, _, _, _, child) =>
+          val newOp = transform1(op)
+          newOp match {
+            case Some(nativeOp) =>
+              val modes = aggExprs.map(_.mode).distinct
+              assert(modes.length == 1)
+              CometHashAggregateExec(
+                nativeOp,
+                op,
+                groupingExprs,
+                aggExprs,
+                child.output,
+                modes.head,
+                child)
+            case None =>
+              op
+          }
+
+        case c @ CoalesceExec(numPartitions, child)
+            if isCometOperatorEnabled(conf, "coalesce")
+              && isCometNative(child) =>
+          QueryPlanSerde.operator2Proto(c) match {
+            case Some(nativeOp) =>
+              val cometOp = CometCoalesceExec(c, numPartitions, child)
+              CometSinkPlaceHolder(nativeOp, c, cometOp)
+            case None =>
+              c
+          }
+
+        case u: UnionExec
+            if isCometOperatorEnabled(conf, "union") &&
+              u.children.forall(isCometNative) =>
+          QueryPlanSerde.operator2Proto(u) match {
+            case Some(nativeOp) =>
+              val cometOp = CometUnionExec(u, u.children)
+              CometSinkPlaceHolder(nativeOp, u, cometOp)
+          }
+
+        case op =>
+          // An operator that is not supported by Comet
+          op
+      }
+    }
+
+    override def apply(plan: SparkPlan): SparkPlan = {
+      // DataFusion doesn't have ANSI mode. For now we just disable CometExec if ANSI mode is
+      // enabled.
+      if (isANSIEnabled(conf)) {
+        logInfo("Comet extension disabled for ANSI mode")
+        return plan
+      }
+
+      // We shouldn't transform Spark query plan if Comet is disabled.
+      if (!isCometEnabled(conf)) return plan
+
+      if (!isCometExecEnabled(conf)) {
+        // Comet exec is disabled
+        plan
+      } else {
+        var newPlan = transform(plan)
+
+        // Remove placeholders
+        newPlan = newPlan.transform {
+          case CometSinkPlaceHolder(_, _, s) => s
+          case CometScanWrapper(_, s) => s
+        }
+
+        // Set up logical links
+        newPlan = newPlan.transform { case op: CometExec =>
+          op.originalPlan.logicalLink.foreach(op.setLogicalLink)
+          op
+        }
+
+        // Convert native execution block by linking consecutive native operators.
+        var firstNativeOp = true
+        newPlan.transformDown {
+          case op: CometNativeExec =>
+            if (firstNativeOp) {
+              op.convertBlock()
+              firstNativeOp = false
+            }
+            op
+          case op =>
+            firstNativeOp = true
+            op
+        }
+      }
+    }
+  }
+}
+
+object CometSparkSessionExtensions extends Logging {
+  lazy val isBigEndian: Boolean = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)
+
+  private[comet] def isANSIEnabled(conf: SQLConf): Boolean = {
+    conf.getConf(SQLConf.ANSI_ENABLED)
+  }
+
+  /**
+   * Checks whether Comet extension should be enabled for Spark.
+   */
+  private[comet] def isCometEnabled(conf: SQLConf): Boolean = {
+    if (isBigEndian) {
+      logInfo("Comet extension is disabled because platform is big-endian")
+      return false
+    }
+    if (!COMET_ENABLED.get(conf)) {
+      logInfo(s"Comet extension is disabled, please turn on s${COMET_ENABLED.key} to enable it")
+      return false
+    }
+
+    // We don't support INT96 timestamps written by Apache Impala in a different timezone yet
+    if (conf.getConf(SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION)) {
+      logWarning(
+        "Comet extension is disabled, because it currently doesn't support" +
+          s" ${SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION} setting to true.")
+      return false
+    }
+
+    try {
+      // This will load the Comet native lib on demand, and if success, should set
+      // `NativeBase.loaded` to true
+      NativeBase.isLoaded
+    } catch {
+      case e: Throwable =>
+        if (COMET_NATIVE_LOAD_REQUIRED.get(conf)) {
+          throw new CometRuntimeException(
+            "Error when loading native library. Please fix the error and try again, or fallback " +
+              s"to Spark by setting ${COMET_ENABLED.key} to false",
+            e)
+        } else {
+          logWarning(
+            "Comet extension is disabled because of error when loading native lib. " +
+              "Falling back to Spark",
+            e)
+        }
+        false
+    }
+  }
+
+  private[comet] def isCometOperatorEnabled(conf: SQLConf, operator: String): Boolean = {
+    val operatorFlag = s"$COMET_EXEC_CONFIG_PREFIX.$operator.enabled"
+    conf.getConfString(operatorFlag, "false").toBoolean || isCometAllOperatorEnabled(conf)
+  }
+
+  private[comet] def isCometScanEnabled(conf: SQLConf): Boolean = {
+    COMET_SCAN_ENABLED.get(conf)
+  }
+
+  private[comet] def isCometExecEnabled(conf: SQLConf): Boolean = {
+    COMET_EXEC_ENABLED.get(conf)
+  }
+
+  private[comet] def isCometAllOperatorEnabled(conf: SQLConf): Boolean = {
+    COMET_EXEC_ALL_OPERATOR_ENABLED.get(conf)
+  }
+
+  private[comet] def isCometAllExprEnabled(conf: SQLConf): Boolean = {
+    COMET_EXEC_ALL_EXPR_ENABLED.get(conf)
+  }
+
+  private[comet] def isSchemaSupported(schema: StructType): Boolean =
+    schema.map(_.dataType).forall(isTypeSupported)
+
+  private[comet] def isTypeSupported(dt: DataType): Boolean = dt match {
+    case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType |
+        BinaryType | StringType | _: DecimalType | DateType | TimestampType =>
+      true
+    // `TimestampNTZType` is private in Spark 3.2.
+    case t: DataType if t.typeName == "timestamp_ntz" && !isSpark32 => true
+    case dt =>
+      logInfo(s"Comet extension is disabled because data type $dt is not supported")
+      false
+  }
+
+  def isCometScan(op: SparkPlan): Boolean = {
+    op.isInstanceOf[CometBatchScanExec] || op.isInstanceOf[CometScanExec]
+  }
+
+  /** Used for operations that weren't available in Spark 3.2 */
+  def isSpark32: Boolean = {
+    org.apache.spark.SPARK_VERSION.matches("3\\.2\\..*")
+  }
+
+  /** Used for operations that are available in Spark 3.4+ */
+  def isSpark34Plus: Boolean = {
+    org.apache.spark.SPARK_VERSION >= "3.4"
+  }
+
+  /** Calculates required memory overhead in MB per executor process for Comet. */
+  def getCometMemoryOverheadInMiB(sparkConf: SparkConf): Long = {
+    // `spark.executor.memory` default value is 1g
+    val executorMemoryMiB = ConfigHelpers
+      .byteFromString(sparkConf.get("spark.executor.memory", "1024MB"), ByteUnit.MiB)
+
+    val minimum = ConfigHelpers
+      .byteFromString(sparkConf.get(COMET_MEMORY_OVERHEAD_MIN_MIB.key, "384"), ByteUnit.MiB)
+    val overheadFactor = sparkConf.getDouble(COMET_MEMORY_OVERHEAD_FACTOR.key, 0.2)
+
+    val overHeadMemFromConf = sparkConf
+      .getOption(COMET_MEMORY_OVERHEAD.key)
+      .map(ConfigHelpers.byteFromString(_, ByteUnit.MiB))
+
+    overHeadMemFromConf.getOrElse(math.max((overheadFactor * executorMemoryMiB).toLong, minimum))
+  }
+
+  /** Calculates required memory overhead in bytes per executor process for Comet. */
+  def getCometMemoryOverhead(sparkConf: SparkConf): Long = {
+    ByteUnit.MiB.toBytes(getCometMemoryOverheadInMiB(sparkConf))
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/MetricsSupport.scala b/spark/src/main/scala/org/apache/comet/MetricsSupport.scala
new file mode 100644
index 000000000..6d08423b1
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/MetricsSupport.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+
+/**
+ * A trait for Comet operators that support SQL metrics
+ */
+trait MetricsSupport {
+  protected var metrics: Map[String, SQLMetric] = Map.empty
+
+  def initMetrics(sparkContext: SparkContext): Map[String, SQLMetric] = {
+    metrics = Map(
+      "ParquetRowGroups" -> SQLMetrics.createMetric(
+        sparkContext,
+        "num of Parquet row groups read"),
+      "ParquetNativeDecodeTime" -> SQLMetrics.createNanoTimingMetric(
+        sparkContext,
+        "time spent in Parquet native decoding"),
+      "ParquetNativeLoadTime" -> SQLMetrics.createNanoTimingMetric(
+        sparkContext,
+        "time spent in loading Parquet native vectors"),
+      "ParquetLoadRowGroupTime" -> SQLMetrics.createNanoTimingMetric(
+        sparkContext,
+        "time spent in loading Parquet row groups"),
+      "ParquetInputFileReadTime" -> SQLMetrics.createNanoTimingMetric(
+        sparkContext,
+        "time spent in reading Parquet file from storage"),
+      "ParquetInputFileReadSize" -> SQLMetrics.createSizeMetric(
+        sparkContext,
+        "read size when reading Parquet file from storage (MB)"),
+      "ParquetInputFileReadThroughput" -> SQLMetrics.createAverageMetric(
+        sparkContext,
+        "read throughput when reading Parquet file from storage (MB/sec)"))
+    metrics
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/Native.scala b/spark/src/main/scala/org/apache/comet/Native.scala
new file mode 100644
index 000000000..c930c7d76
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/Native.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.util.Map
+
+import org.apache.spark.sql.comet.CometMetricNode
+
+class Native extends NativeBase {
+
+  /**
+   * Create a native query plan from execution SparkPlan serialized in bytes.
+   * @param id
+   *   The id of the query plan.
+   * @param configMap
+   *   The Java Map object for the configs of native engine.
+   * @param plan
+   *   the bytes of serialized SparkPlan.
+   * @param metrics
+   *   the native metrics of SparkPlan.
+   * @return
+   *   the address to native query plan.
+   */
+  @native def createPlan(
+      id: Long,
+      configMap: Map[String, String],
+      plan: Array[Byte],
+      metrics: CometMetricNode): Long
+
+  /**
+   * Return the native query plan string for the given address of native query plan. For debugging
+   * purpose.
+   *
+   * @param plan
+   *   the address to native query plan.
+   * @return
+   *   the string of native query plan.
+   */
+  @native def getPlanString(plan: Long): String
+
+  /**
+   * Execute a native query plan based on given input Arrow arrays.
+   *
+   * @param plan
+   *   the address to native query plan.
+   * @param addresses
+   *   the array of addresses of input Arrow arrays. The addresses are exported from Arrow Arrays
+   *   so the number of addresses is always even number in the sequence like [array_address1,
+   *   schema_address1, array_address2, schema_address2, ...]. Note that we can pass empty
+   *   addresses to this API. In this case, it indicates there are no more input arrays to the
+   *   native query plan, but the query plan possibly can still execute to produce output batch
+   *   because it might contain blocking operators such as Sort, Aggregate. When this API returns
+   *   an empty array back, it means the native query plan is finished.
+   * @param finishes
+   *   whether the end of input arrays is reached for each input. If this is set to true, the
+   *   native library will know there is no more inputs. But it doesn't mean the execution is
+   *   finished immediately. For some blocking operators native execution will continue to output.
+   * @param numRows
+   *   the number of rows in the batch.
+   * @return
+   *   an array containing: 1) the status flag (0 for pending, 1 for normal returned arrays,
+   * -1 for end of output), 2) (optional) the number of rows if returned flag is 1 3) the
+   * addresses of output Arrow arrays
+   */
+  @native def executePlan(
+      plan: Long,
+      addresses: Array[Array[Long]],
+      finishes: Array[Boolean],
+      numRows: Int): Array[Long]
+
+  /**
+   * Peeks the next batch of output Arrow arrays from the native query plan without pulling any
+   * input batches.
+   *
+   * @param plan
+   *   the address to native query plan.
+   * @return
+   *   an array containing: 1) the status flag (0 for pending, 1 for normal returned arrays, 2)
+   *   (optional) the number of rows if returned flag is 1 3) the addresses of output Arrow arrays
+   */
+  @native def peekNext(plan: Long): Array[Long]
+
+  /**
+   * Release and drop the native query plan object and context object.
+   *
+   * @param plan
+   *   the address to native query plan.
+   */
+  @native def releasePlan(plan: Long): Unit
+}
diff --git a/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala b/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala
new file mode 100644
index 000000000..ac871cf60
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import scala.collection.JavaConverters
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.hadoop.ParquetInputFormat
+import org.apache.parquet.hadoop.metadata.FileMetaData
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.RecordReaderIterator
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.datasources.parquet.ParquetReadSupport
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.{DateType, StructType, TimestampType}
+import org.apache.spark.util.SerializableConfiguration
+
+import org.apache.comet.CometConf
+import org.apache.comet.MetricsSupport
+import org.apache.comet.shims.ShimSQLConf
+import org.apache.comet.vector.CometVector
+
+/**
+ * A Comet specific Parquet format. This mostly reuse the functionalities from Spark's
+ * [[ParquetFileFormat]], but overrides:
+ *
+ *   - `vectorTypes`, so Spark allocates [[CometVector]] instead of it's own on-heap or off-heap
+ *     column vector in the whole-stage codegen path.
+ *   - `supportBatch`, which simply returns true since data types should have already been checked
+ *     in [[org.apache.comet.CometSparkSessionExtensions]]
+ *   - `buildReaderWithPartitionValues`, so Spark calls Comet's Parquet reader to read values.
+ */
+class CometParquetFileFormat extends ParquetFileFormat with MetricsSupport with ShimSQLConf {
+  override def shortName(): String = "parquet"
+  override def toString: String = "CometParquet"
+  override def hashCode(): Int = getClass.hashCode()
+  override def equals(other: Any): Boolean = other.isInstanceOf[CometParquetFileFormat]
+
+  override def vectorTypes(
+      requiredSchema: StructType,
+      partitionSchema: StructType,
+      sqlConf: SQLConf): Option[Seq[String]] = {
+    val length = requiredSchema.fields.length + partitionSchema.fields.length
+    Option(Seq.fill(length)(classOf[CometVector].getName))
+  }
+
+  override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = true
+
+  override def buildReaderWithPartitionValues(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    val sqlConf = sparkSession.sessionState.conf
+    CometParquetFileFormat.populateConf(sqlConf, hadoopConf)
+    val broadcastedHadoopConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+
+    val isCaseSensitive = sqlConf.caseSensitiveAnalysis
+    val useFieldId = CometParquetUtils.readFieldId(sqlConf)
+    val ignoreMissingIds = CometParquetUtils.ignoreMissingIds(sqlConf)
+    val pushDownDate = sqlConf.parquetFilterPushDownDate
+    val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
+    val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
+    val pushDownStringPredicate = getPushDownStringPredicate(sqlConf)
+    val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
+    val optionsMap = CaseInsensitiveMap[String](options)
+    val parquetOptions = new ParquetOptions(optionsMap, sqlConf)
+    val datetimeRebaseModeInRead = parquetOptions.datetimeRebaseModeInRead
+    val parquetFilterPushDown = sqlConf.parquetFilterPushDown
+
+    // Comet specific configurations
+    val capacity = CometConf.COMET_BATCH_SIZE.get(sqlConf)
+
+    (file: PartitionedFile) => {
+      val sharedConf = broadcastedHadoopConf.value.value
+      val footer = FooterReader.readFooter(sharedConf, file)
+      val footerFileMetaData = footer.getFileMetaData
+      val datetimeRebaseSpec = CometParquetFileFormat.getDatetimeRebaseSpec(
+        file,
+        requiredSchema,
+        sharedConf,
+        footerFileMetaData,
+        datetimeRebaseModeInRead)
+
+      val pushed = if (parquetFilterPushDown) {
+        val parquetSchema = footerFileMetaData.getSchema
+        val parquetFilters = new ParquetFilters(
+          parquetSchema,
+          pushDownDate,
+          pushDownTimestamp,
+          pushDownDecimal,
+          pushDownStringPredicate,
+          pushDownInFilterThreshold,
+          isCaseSensitive,
+          datetimeRebaseSpec)
+        filters
+          // Collects all converted Parquet filter predicates. Notice that not all predicates can
+          // be converted (`ParquetFilters.createFilter` returns an `Option`). That's why a
+          // `flatMap` is used here.
+          .flatMap(parquetFilters.createFilter)
+          .reduceOption(FilterApi.and)
+      } else {
+        None
+      }
+      pushed.foreach(p => ParquetInputFormat.setFilterPredicate(sharedConf, p))
+
+      val batchReader = new BatchReader(
+        sharedConf,
+        file,
+        footer,
+        capacity,
+        requiredSchema,
+        isCaseSensitive,
+        useFieldId,
+        ignoreMissingIds,
+        datetimeRebaseSpec.mode == LegacyBehaviorPolicy.CORRECTED,
+        partitionSchema,
+        file.partitionValues,
+        JavaConverters.mapAsJavaMap(metrics))
+      val iter = new RecordReaderIterator(batchReader)
+      try {
+        batchReader.init()
+        iter.asInstanceOf[Iterator[InternalRow]]
+      } catch {
+        case e: Throwable =>
+          iter.close()
+          throw e
+      }
+    }
+  }
+}
+
+object CometParquetFileFormat extends Logging {
+
+  /**
+   * Populates Parquet related configurations from the input `sqlConf` to the `hadoopConf`
+   */
+  def populateConf(sqlConf: SQLConf, hadoopConf: Configuration): Unit = {
+    hadoopConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[ParquetReadSupport].getName)
+    hadoopConf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, sqlConf.sessionLocalTimeZone)
+    hadoopConf.setBoolean(
+      SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
+      sqlConf.nestedSchemaPruningEnabled)
+    hadoopConf.setBoolean(SQLConf.CASE_SENSITIVE.key, sqlConf.caseSensitiveAnalysis)
+
+    // Sets flags for `ParquetToSparkSchemaConverter`
+    hadoopConf.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING.key, sqlConf.isParquetBinaryAsString)
+    hadoopConf.setBoolean(
+      SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
+      sqlConf.isParquetINT96AsTimestamp)
+
+    // Comet specific configs
+    hadoopConf.setBoolean(
+      CometConf.COMET_PARQUET_ENABLE_DIRECT_BUFFER.key,
+      CometConf.COMET_PARQUET_ENABLE_DIRECT_BUFFER.get())
+    hadoopConf.setBoolean(
+      CometConf.COMET_USE_DECIMAL_128.key,
+      CometConf.COMET_USE_DECIMAL_128.get())
+    hadoopConf.setBoolean(
+      CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.key,
+      CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.get())
+  }
+
+  def getDatetimeRebaseSpec(
+      file: PartitionedFile,
+      sparkSchema: StructType,
+      sharedConf: Configuration,
+      footerFileMetaData: FileMetaData,
+      datetimeRebaseModeInRead: String): RebaseSpec = {
+    val exceptionOnRebase = sharedConf.getBoolean(
+      CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.key,
+      CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.defaultValue.get)
+    var datetimeRebaseSpec = DataSourceUtils.datetimeRebaseSpec(
+      footerFileMetaData.getKeyValueMetaData.get,
+      datetimeRebaseModeInRead)
+    val hasDateOrTimestamp = sparkSchema.exists(f =>
+      f.dataType match {
+        case DateType | TimestampType => true
+        case _ => false
+      })
+
+    if (hasDateOrTimestamp && datetimeRebaseSpec.mode == LegacyBehaviorPolicy.LEGACY) {
+      if (exceptionOnRebase) {
+        logWarning(
+          s"""Found Parquet file $file that could potentially contain dates/timestamps that were
+              written in legacy hybrid Julian/Gregorian calendar. Unlike Spark 3+, which will rebase
+              and return these according to the new Proleptic Gregorian calendar, Comet will throw
+              exception when reading them. If you want to read them as it is according to the hybrid
+              Julian/Gregorian calendar, please set `spark.comet.exceptionOnDatetimeRebase` to
+              false. Otherwise, if you want to read them according to the new Proleptic Gregorian
+              calendar, please disable Comet for this query.""")
+      } else {
+        // do not throw exception on rebase - read as it is
+        datetimeRebaseSpec = datetimeRebaseSpec.copy(LegacyBehaviorPolicy.CORRECTED)
+      }
+    }
+
+    datetimeRebaseSpec
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/parquet/CometParquetPartitionReaderFactory.scala b/spark/src/main/scala/org/apache/comet/parquet/CometParquetPartitionReaderFactory.scala
new file mode 100644
index 000000000..693af125b
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/parquet/CometParquetPartitionReaderFactory.scala
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import scala.collection.JavaConverters
+import scala.collection.mutable
+
+import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate}
+import org.apache.parquet.hadoop.ParquetInputFormat
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+import org.apache.spark.TaskContext
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.PartitionReader
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.datasources.v2.FilePartitionReaderFactory
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.SerializableConfiguration
+
+import org.apache.comet.{CometConf, CometRuntimeException}
+import org.apache.comet.shims.ShimSQLConf
+
+case class CometParquetPartitionReaderFactory(
+    @transient sqlConf: SQLConf,
+    broadcastedConf: Broadcast[SerializableConfiguration],
+    readDataSchema: StructType,
+    partitionSchema: StructType,
+    filters: Array[Filter],
+    options: ParquetOptions,
+    metrics: Map[String, SQLMetric])
+    extends FilePartitionReaderFactory
+    with ShimSQLConf
+    with Logging {
+
+  private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
+  private val useFieldId = CometParquetUtils.readFieldId(sqlConf)
+  private val ignoreMissingIds = CometParquetUtils.ignoreMissingIds(sqlConf)
+  private val pushDownDate = sqlConf.parquetFilterPushDownDate
+  private val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
+  private val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
+  private val pushDownStringPredicate = getPushDownStringPredicate(sqlConf)
+  private val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
+  private val datetimeRebaseModeInRead = options.datetimeRebaseModeInRead
+  private val parquetFilterPushDown = sqlConf.parquetFilterPushDown
+
+  // Comet specific configurations
+  private val batchSize = CometConf.COMET_BATCH_SIZE.get(sqlConf)
+
+  // This is only called at executor on a Broadcast variable, so we don't want it to be
+  // materialized at driver.
+  @transient private lazy val preFetchEnabled = {
+    val conf = broadcastedConf.value.value
+
+    conf.getBoolean(
+      CometConf.COMET_SCAN_PREFETCH_ENABLED.key,
+      CometConf.COMET_SCAN_PREFETCH_ENABLED.defaultValue.get)
+  }
+
+  private var cometReaders: Iterator[BatchReader] = _
+  private val cometReaderExceptionMap = new mutable.HashMap[PartitionedFile, Throwable]()
+
+  // TODO: we may want to revisit this as we're going to only support flat types at the beginning
+  override def supportColumnarReads(partition: InputPartition): Boolean = true
+
+  override def createColumnarReader(partition: InputPartition): PartitionReader[ColumnarBatch] = {
+    if (preFetchEnabled) {
+      val filePartition = partition.asInstanceOf[FilePartition]
+      val conf = broadcastedConf.value.value
+
+      val threadNum = conf.getInt(
+        CometConf.COMET_SCAN_PREFETCH_THREAD_NUM.key,
+        CometConf.COMET_SCAN_PREFETCH_THREAD_NUM.defaultValue.get)
+      val prefetchThreadPool = CometPrefetchThreadPool.getOrCreateThreadPool(threadNum)
+
+      this.cometReaders = filePartition.files
+        .map { file =>
+          // `init()` call is deferred to when the prefetch task begins.
+          // Otherwise we will hold too many resources for readers which are not ready
+          // to prefetch.
+          val cometReader = buildCometReader(file)
+          if (cometReader != null) {
+            cometReader.submitPrefetchTask(prefetchThreadPool)
+          }
+
+          cometReader
+        }
+        .toSeq
+        .toIterator
+    }
+
+    super.createColumnarReader(partition)
+  }
+
+  override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] =
+    throw new UnsupportedOperationException("Comet doesn't support 'buildReader'")
+
+  private def buildCometReader(file: PartitionedFile): BatchReader = {
+    val conf = broadcastedConf.value.value
+
+    try {
+      val (datetimeRebaseSpec, footer, filters) = getFilter(file)
+      filters.foreach(pushed => ParquetInputFormat.setFilterPredicate(conf, pushed))
+      val cometReader = new BatchReader(
+        conf,
+        file,
+        footer,
+        batchSize,
+        readDataSchema,
+        isCaseSensitive,
+        useFieldId,
+        ignoreMissingIds,
+        datetimeRebaseSpec.mode == LegacyBehaviorPolicy.CORRECTED,
+        partitionSchema,
+        file.partitionValues,
+        JavaConverters.mapAsJavaMap(metrics))
+      val taskContext = Option(TaskContext.get)
+      taskContext.foreach(_.addTaskCompletionListener[Unit](_ => cometReader.close()))
+      return cometReader
+    } catch {
+      case e: Throwable if preFetchEnabled =>
+        // Keep original exception
+        cometReaderExceptionMap.put(file, e)
+    }
+    null
+  }
+
+  override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
+    val cometReader = if (!preFetchEnabled) {
+      // Prefetch is not enabled, create comet reader and initiate it.
+      val cometReader = buildCometReader(file)
+      cometReader.init()
+
+      cometReader
+    } else {
+      // If prefetch is enabled, we already tried to access the file when in `buildCometReader`.
+      // It is possibly we got an exception like `FileNotFoundException` and we need to throw it
+      // now to let Spark handle it.
+      val reader = cometReaders.next()
+      val exception = cometReaderExceptionMap.get(file)
+      exception.foreach(e => throw e)
+
+      if (reader == null) {
+        throw new CometRuntimeException(s"Cannot find comet file reader for $file")
+      }
+      reader
+    }
+    CometPartitionReader(cometReader)
+  }
+
+  def getFilter(file: PartitionedFile): (RebaseSpec, ParquetMetadata, Option[FilterPredicate]) = {
+    val sharedConf = broadcastedConf.value.value
+    val footer = FooterReader.readFooter(sharedConf, file)
+    val footerFileMetaData = footer.getFileMetaData
+    val datetimeRebaseSpec = CometParquetFileFormat.getDatetimeRebaseSpec(
+      file,
+      readDataSchema,
+      sharedConf,
+      footerFileMetaData,
+      datetimeRebaseModeInRead)
+
+    val pushed = if (parquetFilterPushDown) {
+      val parquetSchema = footerFileMetaData.getSchema
+      val parquetFilters = new ParquetFilters(
+        parquetSchema,
+        pushDownDate,
+        pushDownTimestamp,
+        pushDownDecimal,
+        pushDownStringPredicate,
+        pushDownInFilterThreshold,
+        isCaseSensitive,
+        datetimeRebaseSpec)
+      filters
+        // Collects all converted Parquet filter predicates. Notice that not all predicates can be
+        // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
+        // is used here.
+        .flatMap(parquetFilters.createFilter)
+        .reduceOption(FilterApi.and)
+    } else {
+      None
+    }
+    (datetimeRebaseSpec, footer, pushed)
+  }
+
+  override def createReader(inputPartition: InputPartition): PartitionReader[InternalRow] =
+    throw new UnsupportedOperationException("Only 'createColumnarReader' is supported.")
+
+  /**
+   * A simple adapter on Comet's [[BatchReader]].
+   */
+  protected case class CometPartitionReader(reader: BatchReader)
+      extends PartitionReader[ColumnarBatch] {
+
+    override def next(): Boolean = {
+      reader.nextBatch()
+    }
+
+    override def get(): ColumnarBatch = {
+      reader.currentBatch()
+    }
+
+    override def close(): Unit = {
+      reader.close()
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/parquet/CometParquetScan.scala b/spark/src/main/scala/org/apache/comet/parquet/CometParquetScan.scala
new file mode 100644
index 000000000..e3cd33b41
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/parquet/CometParquetScan.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import scala.collection.JavaConverters.mapAsScalaMapConverter
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.datasources.v2.FileScan
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.SerializableConfiguration
+
+import org.apache.comet.MetricsSupport
+
+trait CometParquetScan extends FileScan with MetricsSupport {
+  def sparkSession: SparkSession
+  def hadoopConf: Configuration
+  def readDataSchema: StructType
+  def readPartitionSchema: StructType
+  def pushedFilters: Array[Filter]
+  def options: CaseInsensitiveStringMap
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: CometParquetScan =>
+      super.equals(other) && readDataSchema == other.readDataSchema &&
+      readPartitionSchema == other.readPartitionSchema &&
+      equivalentFilters(pushedFilters, other.pushedFilters)
+    case _ => false
+  }
+
+  override def hashCode(): Int = getClass.hashCode()
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    val sqlConf = sparkSession.sessionState.conf
+    CometParquetFileFormat.populateConf(sqlConf, hadoopConf)
+    val broadcastedConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+    CometParquetPartitionReaderFactory(
+      sqlConf,
+      broadcastedConf,
+      readDataSchema,
+      readPartitionSchema,
+      pushedFilters,
+      new ParquetOptions(options.asScala.toMap, sqlConf),
+      metrics)
+  }
+}
+
+object CometParquetScan {
+  def apply(scan: ParquetScan): CometParquetScan =
+    new ParquetScan(
+      scan.sparkSession,
+      scan.hadoopConf,
+      scan.fileIndex,
+      scan.dataSchema,
+      scan.readDataSchema,
+      scan.readPartitionSchema,
+      scan.pushedFilters,
+      scan.options,
+      partitionFilters = scan.partitionFilters,
+      dataFilters = scan.dataFilters) with CometParquetScan
+}
diff --git a/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala b/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala
new file mode 100644
index 000000000..5994dfb41
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala
@@ -0,0 +1,882 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort}
+import java.math.{BigDecimal => JBigDecimal}
+import java.sql.{Date, Timestamp}
+import java.time.{Duration, Instant, LocalDate, Period}
+import java.util.Locale
+
+import scala.collection.JavaConverters.asScalaBufferConverter
+
+import org.apache.parquet.column.statistics.{Statistics => ParquetStatistics}
+import org.apache.parquet.filter2.predicate._
+import org.apache.parquet.filter2.predicate.SparkFilterApi._
+import org.apache.parquet.io.api.Binary
+import org.apache.parquet.schema.{GroupType, LogicalTypeAnnotation, MessageType, PrimitiveComparator, PrimitiveType, Type}
+import org.apache.parquet.schema.LogicalTypeAnnotation.{DecimalLogicalTypeAnnotation, TimeUnit}
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
+import org.apache.parquet.schema.Type.Repetition
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, CaseInsensitiveMap, DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, RebaseSpec}
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
+import org.apache.spark.sql.sources
+import org.apache.spark.unsafe.types.UTF8String
+
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+/**
+ * Copied from Spark 3.2 & 3.4, in order to fix Parquet shading issue. TODO: find a way to remove
+ * this duplication
+ *
+ * Some utility function to convert Spark data source filters to Parquet filters.
+ */
+class ParquetFilters(
+    schema: MessageType,
+    pushDownDate: Boolean,
+    pushDownTimestamp: Boolean,
+    pushDownDecimal: Boolean,
+    pushDownStringPredicate: Boolean,
+    pushDownInFilterThreshold: Int,
+    caseSensitive: Boolean,
+    datetimeRebaseSpec: RebaseSpec) {
+  // A map which contains parquet field name and data type, if predicate push down applies.
+  //
+  // Each key in `nameToParquetField` represents a column; `dots` are used as separators for
+  // nested columns. If any part of the names contains `dots`, it is quoted to avoid confusion.
+  // See `org.apache.spark.sql.connector.catalog.quote` for implementation details.
+  private val nameToParquetField: Map[String, ParquetPrimitiveField] = {
+    // Recursively traverse the parquet schema to get primitive fields that can be pushed-down.
+    // `parentFieldNames` is used to keep track of the current nested level when traversing.
+    def getPrimitiveFields(
+        fields: Seq[Type],
+        parentFieldNames: Array[String] = Array.empty): Seq[ParquetPrimitiveField] = {
+      fields.flatMap {
+        // Parquet only supports predicate push-down for non-repeated primitive types.
+        // TODO(SPARK-39393): Remove extra condition when parquet added filter predicate support for
+        //                    repeated columns (https://issues.apache.org/jira/browse/PARQUET-34)
+        case p: PrimitiveType if p.getRepetition != Repetition.REPEATED =>
+          Some(
+            ParquetPrimitiveField(
+              fieldNames = parentFieldNames :+ p.getName,
+              fieldType = ParquetSchemaType(
+                p.getLogicalTypeAnnotation,
+                p.getPrimitiveTypeName,
+                p.getTypeLength)))
+        // Note that when g is a `Struct`, `g.getOriginalType` is `null`.
+        // When g is a `Map`, `g.getOriginalType` is `MAP`.
+        // When g is a `List`, `g.getOriginalType` is `LIST`.
+        case g: GroupType if g.getOriginalType == null =>
+          getPrimitiveFields(g.getFields.asScala.toSeq, parentFieldNames :+ g.getName)
+        // Parquet only supports push-down for primitive types; as a result, Map and List types
+        // are removed.
+        case _ => None
+      }
+    }
+
+    val primitiveFields = getPrimitiveFields(schema.getFields.asScala.toSeq).map { field =>
+      (field.fieldNames.toSeq.map(quoteIfNeeded).mkString("."), field)
+    }
+    if (caseSensitive) {
+      primitiveFields.toMap
+    } else {
+      // Don't consider ambiguity here, i.e. more than one field is matched in case insensitive
+      // mode, just skip pushdown for these fields, they will trigger Exception when reading,
+      // See: SPARK-25132.
+      val dedupPrimitiveFields =
+        primitiveFields
+          .groupBy(_._1.toLowerCase(Locale.ROOT))
+          .filter(_._2.size == 1)
+          .mapValues(_.head._2)
+      CaseInsensitiveMap(dedupPrimitiveFields.toMap)
+    }
+  }
+
+  /**
+   * Holds a single primitive field information stored in the underlying parquet file.
+   *
+   * @param fieldNames
+   *   a field name as an array of string multi-identifier in parquet file
+   * @param fieldType
+   *   field type related info in parquet file
+   */
+  private case class ParquetPrimitiveField(
+      fieldNames: Array[String],
+      fieldType: ParquetSchemaType)
+
+  private case class ParquetSchemaType(
+      logicalTypeAnnotation: LogicalTypeAnnotation,
+      primitiveTypeName: PrimitiveTypeName,
+      length: Int)
+
+  private val ParquetBooleanType = ParquetSchemaType(null, BOOLEAN, 0)
+  private val ParquetByteType =
+    ParquetSchemaType(LogicalTypeAnnotation.intType(8, true), INT32, 0)
+  private val ParquetShortType =
+    ParquetSchemaType(LogicalTypeAnnotation.intType(16, true), INT32, 0)
+  private val ParquetIntegerType = ParquetSchemaType(null, INT32, 0)
+  private val ParquetLongType = ParquetSchemaType(null, INT64, 0)
+  private val ParquetFloatType = ParquetSchemaType(null, FLOAT, 0)
+  private val ParquetDoubleType = ParquetSchemaType(null, DOUBLE, 0)
+  private val ParquetStringType =
+    ParquetSchemaType(LogicalTypeAnnotation.stringType(), BINARY, 0)
+  private val ParquetBinaryType = ParquetSchemaType(null, BINARY, 0)
+  private val ParquetDateType =
+    ParquetSchemaType(LogicalTypeAnnotation.dateType(), INT32, 0)
+  private val ParquetTimestampMicrosType =
+    ParquetSchemaType(LogicalTypeAnnotation.timestampType(true, TimeUnit.MICROS), INT64, 0)
+  private val ParquetTimestampMillisType =
+    ParquetSchemaType(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS), INT64, 0)
+
+  private def dateToDays(date: Any): Int = {
+    val gregorianDays = date match {
+      case d: Date => DateTimeUtils.fromJavaDate(d)
+      case ld: LocalDate => DateTimeUtils.localDateToDays(ld)
+    }
+    datetimeRebaseSpec.mode match {
+      case LegacyBehaviorPolicy.LEGACY => rebaseGregorianToJulianDays(gregorianDays)
+      case _ => gregorianDays
+    }
+  }
+
+  private def timestampToMicros(v: Any): JLong = {
+    val gregorianMicros = v match {
+      case i: Instant => DateTimeUtils.instantToMicros(i)
+      case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t)
+    }
+    datetimeRebaseSpec.mode match {
+      case LegacyBehaviorPolicy.LEGACY =>
+        rebaseGregorianToJulianMicros(datetimeRebaseSpec.timeZone, gregorianMicros)
+      case _ => gregorianMicros
+    }
+  }
+
+  private def decimalToInt32(decimal: JBigDecimal): Integer = decimal.unscaledValue().intValue()
+
+  private def decimalToInt64(decimal: JBigDecimal): JLong = decimal.unscaledValue().longValue()
+
+  private def decimalToByteArray(decimal: JBigDecimal, numBytes: Int): Binary = {
+    val decimalBuffer = new Array[Byte](numBytes)
+    val bytes = decimal.unscaledValue().toByteArray
+
+    val fixedLengthBytes = if (bytes.length == numBytes) {
+      bytes
+    } else {
+      val signByte = if (bytes.head < 0) -1: Byte else 0: Byte
+      java.util.Arrays.fill(decimalBuffer, 0, numBytes - bytes.length, signByte)
+      System.arraycopy(bytes, 0, decimalBuffer, numBytes - bytes.length, bytes.length)
+      decimalBuffer
+    }
+    Binary.fromConstantByteArray(fixedLengthBytes, 0, numBytes)
+  }
+
+  private def timestampToMillis(v: Any): JLong = {
+    val micros = timestampToMicros(v)
+    val millis = DateTimeUtils.microsToMillis(micros)
+    millis.asInstanceOf[JLong]
+  }
+
+  private def toIntValue(v: Any): Integer = {
+    Option(v)
+      .map {
+        case p: Period => IntervalUtils.periodToMonths(p)
+        case n => n.asInstanceOf[Number].intValue
+      }
+      .map(_.asInstanceOf[Integer])
+      .orNull
+  }
+
+  private def toLongValue(v: Any): JLong = v match {
+    case d: Duration => IntervalUtils.durationToMicros(d)
+    case l => l.asInstanceOf[JLong]
+  }
+
+  private val makeEq
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetBooleanType =>
+      (n: Array[String], v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[JBoolean])
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          intColumn(n),
+          Option(v).map(_.asInstanceOf[Number].intValue.asInstanceOf[Integer]).orNull)
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    // Binary.fromString and Binary.fromByteArray don't accept null values
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          binaryColumn(n),
+          Option(v).map(s => Binary.fromString(s.asInstanceOf[String])).orNull)
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          binaryColumn(n),
+          Option(v).map(_ => Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          intColumn(n),
+          Option(v).map(date => dateToDays(date).asInstanceOf[Integer]).orNull)
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(longColumn(n), Option(v).map(timestampToMicros).orNull)
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(longColumn(n), Option(v).map(timestampToMillis).orNull)
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          intColumn(n),
+          Option(v).map(d => decimalToInt32(d.asInstanceOf[JBigDecimal])).orNull)
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          longColumn(n),
+          Option(v).map(d => decimalToInt64(d.asInstanceOf[JBigDecimal])).orNull)
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.eq(
+          binaryColumn(n),
+          Option(v).map(d => decimalToByteArray(d.asInstanceOf[JBigDecimal], length)).orNull)
+  }
+
+  private val makeNotEq
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetBooleanType =>
+      (n: Array[String], v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[JBoolean])
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          intColumn(n),
+          Option(v).map(_.asInstanceOf[Number].intValue.asInstanceOf[Integer]).orNull)
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          binaryColumn(n),
+          Option(v).map(s => Binary.fromString(s.asInstanceOf[String])).orNull)
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          binaryColumn(n),
+          Option(v).map(_ => Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          intColumn(n),
+          Option(v).map(date => dateToDays(date).asInstanceOf[Integer]).orNull)
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(longColumn(n), Option(v).map(timestampToMicros).orNull)
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(longColumn(n), Option(v).map(timestampToMillis).orNull)
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          intColumn(n),
+          Option(v).map(d => decimalToInt32(d.asInstanceOf[JBigDecimal])).orNull)
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          longColumn(n),
+          Option(v).map(d => decimalToInt64(d.asInstanceOf[JBigDecimal])).orNull)
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.notEq(
+          binaryColumn(n),
+          Option(v).map(d => decimalToByteArray(d.asInstanceOf[JBigDecimal], length)).orNull)
+  }
+
+  private val makeLt
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(intColumn(n), v.asInstanceOf[Number].intValue.asInstanceOf[Integer])
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(intColumn(n), dateToDays(v).asInstanceOf[Integer])
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.lt(longColumn(n), timestampToMicros(v))
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.lt(longColumn(n), timestampToMillis(v))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(intColumn(n), decimalToInt32(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(longColumn(n), decimalToInt64(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.lt(binaryColumn(n), decimalToByteArray(v.asInstanceOf[JBigDecimal], length))
+  }
+
+  private val makeLtEq
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(intColumn(n), v.asInstanceOf[Number].intValue.asInstanceOf[Integer])
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(intColumn(n), dateToDays(v).asInstanceOf[Integer])
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.ltEq(longColumn(n), timestampToMicros(v))
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.ltEq(longColumn(n), timestampToMillis(v))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(intColumn(n), decimalToInt32(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(longColumn(n), decimalToInt64(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.ltEq(binaryColumn(n), decimalToByteArray(v.asInstanceOf[JBigDecimal], length))
+  }
+
+  private val makeGt
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(intColumn(n), v.asInstanceOf[Number].intValue.asInstanceOf[Integer])
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(intColumn(n), dateToDays(v).asInstanceOf[Integer])
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.gt(longColumn(n), timestampToMicros(v))
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.gt(longColumn(n), timestampToMillis(v))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(intColumn(n), decimalToInt32(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(longColumn(n), decimalToInt64(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gt(binaryColumn(n), decimalToByteArray(v.asInstanceOf[JBigDecimal], length))
+  }
+
+  private val makeGtEq
+      : PartialFunction[ParquetSchemaType, (Array[String], Any) => FilterPredicate] = {
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(intColumn(n), v.asInstanceOf[Number].intValue.asInstanceOf[Integer])
+    case ParquetLongType =>
+      (n: Array[String], v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[JLong])
+    case ParquetFloatType =>
+      (n: Array[String], v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[JFloat])
+    case ParquetDoubleType =>
+      (n: Array[String], v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[JDouble])
+
+    case ParquetStringType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+    case ParquetBinaryType =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(intColumn(n), dateToDays(v).asInstanceOf[Integer])
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.gtEq(longColumn(n), timestampToMicros(v))
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Any) => FilterApi.gtEq(longColumn(n), timestampToMillis(v))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(intColumn(n), decimalToInt32(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(longColumn(n), decimalToInt64(v.asInstanceOf[JBigDecimal]))
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (n: Array[String], v: Any) =>
+        FilterApi.gtEq(binaryColumn(n), decimalToByteArray(v.asInstanceOf[JBigDecimal], length))
+  }
+
+  private val makeInPredicate: PartialFunction[
+    ParquetSchemaType,
+    (Array[String], Array[Any], ParquetStatistics[_]) => FilterPredicate] = {
+    case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(toIntValue(_).toInt).foreach(statistics.updateStats)
+        FilterApi.and(
+          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
+          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+
+    case ParquetLongType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(toLongValue).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
+          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+
+    case ParquetFloatType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(_.asInstanceOf[JFloat]).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(floatColumn(n), statistics.genericGetMin().asInstanceOf[JFloat]),
+          FilterApi.ltEq(floatColumn(n), statistics.genericGetMax().asInstanceOf[JFloat]))
+
+    case ParquetDoubleType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(_.asInstanceOf[JDouble]).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(doubleColumn(n), statistics.genericGetMin().asInstanceOf[JDouble]),
+          FilterApi.ltEq(doubleColumn(n), statistics.genericGetMax().asInstanceOf[JDouble]))
+
+    case ParquetStringType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(s => Binary.fromString(s.asInstanceOf[String])).foreach(statistics.updateStats)
+        FilterApi.and(
+          FilterApi.gtEq(binaryColumn(n), statistics.genericGetMin().asInstanceOf[Binary]),
+          FilterApi.ltEq(binaryColumn(n), statistics.genericGetMax().asInstanceOf[Binary]))
+
+    case ParquetBinaryType =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(b => Binary.fromReusedByteArray(b.asInstanceOf[Array[Byte]]))
+          .foreach(statistics.updateStats)
+        FilterApi.and(
+          FilterApi.gtEq(binaryColumn(n), statistics.genericGetMin().asInstanceOf[Binary]),
+          FilterApi.ltEq(binaryColumn(n), statistics.genericGetMax().asInstanceOf[Binary]))
+
+    case ParquetDateType if pushDownDate =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(dateToDays).map(_.asInstanceOf[Integer]).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
+          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+
+    case ParquetTimestampMicrosType if pushDownTimestamp =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(timestampToMicros).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
+          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+
+    case ParquetTimestampMillisType if pushDownTimestamp =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(timestampToMillis).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
+          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(_.asInstanceOf[JBigDecimal]).map(decimalToInt32).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
+          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
+      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(_.asInstanceOf[JBigDecimal]).map(decimalToInt64).foreach(statistics.updateStats(_))
+        FilterApi.and(
+          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
+          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+
+    case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
+        if pushDownDecimal =>
+      (path: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
+        v.map(d => decimalToByteArray(d.asInstanceOf[JBigDecimal], length))
+          .foreach(statistics.updateStats)
+        FilterApi.and(
+          FilterApi.gtEq(binaryColumn(path), statistics.genericGetMin().asInstanceOf[Binary]),
+          FilterApi.ltEq(binaryColumn(path), statistics.genericGetMax().asInstanceOf[Binary]))
+  }
+
+  // Returns filters that can be pushed down when reading Parquet files.
+  def convertibleFilters(filters: Seq[sources.Filter]): Seq[sources.Filter] = {
+    filters.flatMap(convertibleFiltersHelper(_, canPartialPushDown = true))
+  }
+
+  private def convertibleFiltersHelper(
+      predicate: sources.Filter,
+      canPartialPushDown: Boolean): Option[sources.Filter] = {
+    predicate match {
+      case sources.And(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        (leftResultOptional, rightResultOptional) match {
+          case (Some(leftResult), Some(rightResult)) => Some(sources.And(leftResult, rightResult))
+          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
+          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
+          case _ => None
+        }
+
+      case sources.Or(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        if (leftResultOptional.isEmpty || rightResultOptional.isEmpty) {
+          None
+        } else {
+          Some(sources.Or(leftResultOptional.get, rightResultOptional.get))
+        }
+      case sources.Not(pred) =>
+        val resultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
+        resultOptional.map(sources.Not)
+
+      case other =>
+        if (createFilter(other).isDefined) {
+          Some(other)
+        } else {
+          None
+        }
+    }
+  }
+
+  /**
+   * Converts data sources filters to Parquet filter predicates.
+   */
+  def createFilter(predicate: sources.Filter): Option[FilterPredicate] = {
+    createFilterHelper(predicate, canPartialPushDownConjuncts = true)
+  }
+
+  // Parquet's type in the given file should be matched to the value's type
+  // in the pushed filter in order to push down the filter to Parquet.
+  private def valueCanMakeFilterOn(name: String, value: Any): Boolean = {
+    value == null || (nameToParquetField(name).fieldType match {
+      case ParquetBooleanType => value.isInstanceOf[JBoolean]
+      case ParquetByteType | ParquetShortType | ParquetIntegerType =>
+        if (isSpark34Plus) {
+          value match {
+            // Byte/Short/Int are all stored as INT32 in Parquet so filters are built using type
+            // Int. We don't create a filter if the value would overflow.
+            case _: JByte | _: JShort | _: Integer => true
+            case v: JLong => v.longValue() >= Int.MinValue && v.longValue() <= Int.MaxValue
+            case _ => false
+          }
+        } else {
+          // If not Spark 3.4+, we still following the old behavior as Spark does.
+          value.isInstanceOf[Number]
+        }
+      case ParquetLongType => value.isInstanceOf[JLong]
+      case ParquetFloatType => value.isInstanceOf[JFloat]
+      case ParquetDoubleType => value.isInstanceOf[JDouble]
+      case ParquetStringType => value.isInstanceOf[String]
+      case ParquetBinaryType => value.isInstanceOf[Array[Byte]]
+      case ParquetDateType =>
+        value.isInstanceOf[Date] || value.isInstanceOf[LocalDate]
+      case ParquetTimestampMicrosType | ParquetTimestampMillisType =>
+        value.isInstanceOf[Timestamp] || value.isInstanceOf[Instant]
+      case ParquetSchemaType(decimalType: DecimalLogicalTypeAnnotation, INT32, _) =>
+        isDecimalMatched(value, decimalType)
+      case ParquetSchemaType(decimalType: DecimalLogicalTypeAnnotation, INT64, _) =>
+        isDecimalMatched(value, decimalType)
+      case ParquetSchemaType(
+            decimalType: DecimalLogicalTypeAnnotation,
+            FIXED_LEN_BYTE_ARRAY,
+            _) =>
+        isDecimalMatched(value, decimalType)
+      case _ => false
+    })
+  }
+
+  // Decimal type must make sure that filter value's scale matched the file.
+  // If doesn't matched, which would cause data corruption.
+  private def isDecimalMatched(
+      value: Any,
+      decimalLogicalType: DecimalLogicalTypeAnnotation): Boolean = value match {
+    case decimal: JBigDecimal =>
+      decimal.scale == decimalLogicalType.getScale
+    case _ => false
+  }
+
+  private def canMakeFilterOn(name: String, value: Any): Boolean = {
+    nameToParquetField.contains(name) && valueCanMakeFilterOn(name, value)
+  }
+
+  /**
+   * @param predicate
+   *   the input filter predicates. Not all the predicates can be pushed down.
+   * @param canPartialPushDownConjuncts
+   *   whether a subset of conjuncts of predicates can be pushed down safely. Pushing ONLY one
+   *   side of AND down is safe to do at the top level or none of its ancestors is NOT and OR.
+   * @return
+   *   the Parquet-native filter predicates that are eligible for pushdown.
+   */
+  private def createFilterHelper(
+      predicate: sources.Filter,
+      canPartialPushDownConjuncts: Boolean): Option[FilterPredicate] = {
+    // NOTE:
+    //
+    // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`,
+    // which can be casted to `false` implicitly. Please refer to the `eval` method of these
+    // operators and the `PruneFilters` rule for details.
+
+    // Hyukjin:
+    // I added [[EqualNullSafe]] with [[org.apache.parquet.filter2.predicate.Operators.Eq]].
+    // So, it performs equality comparison identically when given [[sources.Filter]] is [[EqualTo]].
+    // The reason why I did this is, that the actual Parquet filter checks null-safe equality
+    // comparison.
+    // So I added this and maybe [[EqualTo]] should be changed. It still seems fine though, because
+    // physical planning does not set `NULL` to [[EqualTo]] but changes it to [[IsNull]] and etc.
+    // Probably I missed something and obviously this should be changed.
+
+    predicate match {
+      case sources.IsNull(name) if canMakeFilterOn(name, null) =>
+        makeEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, null))
+      case sources.IsNotNull(name) if canMakeFilterOn(name, null) =>
+        makeNotEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, null))
+
+      case sources.EqualTo(name, value) if canMakeFilterOn(name, value) =>
+        makeEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+      case sources.Not(sources.EqualTo(name, value)) if canMakeFilterOn(name, value) =>
+        makeNotEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+
+      case sources.EqualNullSafe(name, value) if canMakeFilterOn(name, value) =>
+        makeEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+      case sources.Not(sources.EqualNullSafe(name, value)) if canMakeFilterOn(name, value) =>
+        makeNotEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+
+      case sources.LessThan(name, value) if canMakeFilterOn(name, value) =>
+        makeLt
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+      case sources.LessThanOrEqual(name, value) if canMakeFilterOn(name, value) =>
+        makeLtEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+
+      case sources.GreaterThan(name, value) if canMakeFilterOn(name, value) =>
+        makeGt
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+      case sources.GreaterThanOrEqual(name, value) if canMakeFilterOn(name, value) =>
+        makeGtEq
+          .lift(nameToParquetField(name).fieldType)
+          .map(_(nameToParquetField(name).fieldNames, value))
+
+      case sources.And(lhs, rhs) =>
+        // At here, it is not safe to just convert one side and remove the other side
+        // if we do not understand what the parent filters are.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
+        // convert b in ('1'). If we only convert a = 2, we will end up with a filter
+        // NOT(a = 2), which will generate wrong results.
+        //
+        // Pushing one side of AND down is only safe to do at the top level or in the child
+        // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
+        // can be safely removed.
+        val lhsFilterOption =
+          createFilterHelper(lhs, canPartialPushDownConjuncts)
+        val rhsFilterOption =
+          createFilterHelper(rhs, canPartialPushDownConjuncts)
+
+        (lhsFilterOption, rhsFilterOption) match {
+          case (Some(lhsFilter), Some(rhsFilter)) => Some(FilterApi.and(lhsFilter, rhsFilter))
+          case (Some(lhsFilter), None) if canPartialPushDownConjuncts => Some(lhsFilter)
+          case (None, Some(rhsFilter)) if canPartialPushDownConjuncts => Some(rhsFilter)
+          case _ => None
+        }
+
+      case sources.Or(lhs, rhs) =>
+        // The Or predicate is convertible when both of its children can be pushed down.
+        // That is to say, if one/both of the children can be partially pushed down, the Or
+        // predicate can be partially pushed down as well.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have
+        // (a1 AND a2) OR (b1 AND b2),
+        // a1 and b1 is convertible, while a2 and b2 is not.
+        // The predicate can be converted as
+        // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+        // As per the logical in And predicate, we can push down (a1 OR b1).
+        for {
+          lhsFilter <- createFilterHelper(lhs, canPartialPushDownConjuncts)
+          rhsFilter <- createFilterHelper(rhs, canPartialPushDownConjuncts)
+        } yield FilterApi.or(lhsFilter, rhsFilter)
+
+      case sources.Not(pred) =>
+        createFilterHelper(pred, canPartialPushDownConjuncts = false)
+          .map(FilterApi.not)
+
+      case sources.In(name, values)
+          if pushDownInFilterThreshold > 0 && values.nonEmpty &&
+            canMakeFilterOn(name, values.head) =>
+        val fieldType = nameToParquetField(name).fieldType
+        val fieldNames = nameToParquetField(name).fieldNames
+        if (values.length <= pushDownInFilterThreshold) {
+          values.distinct
+            .flatMap { v =>
+              makeEq.lift(fieldType).map(_(fieldNames, v))
+            }
+            .reduceLeftOption(FilterApi.or)
+        } else if (canPartialPushDownConjuncts) {
+          val primitiveType = schema.getColumnDescription(fieldNames).getPrimitiveType
+          val statistics: ParquetStatistics[_] = ParquetStatistics.createStats(primitiveType)
+          if (values.contains(null)) {
+            Seq(
+              makeEq.lift(fieldType).map(_(fieldNames, null)),
+              makeInPredicate
+                .lift(fieldType)
+                .map(_(fieldNames, values.filter(_ != null), statistics))).flatten
+              .reduceLeftOption(FilterApi.or)
+          } else {
+            makeInPredicate.lift(fieldType).map(_(fieldNames, values, statistics))
+          }
+        } else {
+          None
+        }
+
+      case sources.StringStartsWith(name, prefix)
+          if pushDownStringPredicate && canMakeFilterOn(name, prefix) =>
+        Option(prefix).map { v =>
+          FilterApi.userDefined(
+            binaryColumn(nameToParquetField(name).fieldNames),
+            new UserDefinedPredicate[Binary] with Serializable {
+              private val strToBinary = Binary.fromReusedByteArray(v.getBytes)
+              private val size = strToBinary.length
+
+              override def canDrop(statistics: Statistics[Binary]): Boolean = {
+                val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR
+                val max = statistics.getMax
+                val min = statistics.getMin
+                comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) < 0 ||
+                comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) > 0
+              }
+
+              override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = {
+                val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR
+                val max = statistics.getMax
+                val min = statistics.getMin
+                comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) == 0 &&
+                comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) == 0
+              }
+
+              override def keep(value: Binary): Boolean = {
+                value != null && UTF8String
+                  .fromBytes(value.getBytes)
+                  .startsWith(UTF8String.fromBytes(strToBinary.getBytes))
+              }
+            })
+        }
+
+      case sources.StringEndsWith(name, suffix)
+          if pushDownStringPredicate && canMakeFilterOn(name, suffix) =>
+        Option(suffix).map { v =>
+          FilterApi.userDefined(
+            binaryColumn(nameToParquetField(name).fieldNames),
+            new UserDefinedPredicate[Binary] with Serializable {
+              private val suffixStr = UTF8String.fromString(v)
+              override def canDrop(statistics: Statistics[Binary]): Boolean = false
+              override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
+              override def keep(value: Binary): Boolean = {
+                value != null && UTF8String.fromBytes(value.getBytes).endsWith(suffixStr)
+              }
+            })
+        }
+
+      case sources.StringContains(name, value)
+          if pushDownStringPredicate && canMakeFilterOn(name, value) =>
+        Option(value).map { v =>
+          FilterApi.userDefined(
+            binaryColumn(nameToParquetField(name).fieldNames),
+            new UserDefinedPredicate[Binary] with Serializable {
+              private val subStr = UTF8String.fromString(v)
+              override def canDrop(statistics: Statistics[Binary]): Boolean = false
+              override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
+              override def keep(value: Binary): Boolean = {
+                value != null && UTF8String.fromBytes(value.getBytes).contains(subStr)
+              }
+            })
+        }
+
+      case _ => None
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
new file mode 100644
index 000000000..609026ee0
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -0,0 +1,1806 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.serde
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Average, Count, Final, Max, Min, Partial, Sum}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, SinglePartition}
+import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
+import org.apache.spark.sql.comet.{CometHashAggregateExec, CometSinkPlaceHolder, DecimalPrecision}
+import org.apache.spark.sql.execution
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import org.apache.comet.CometSparkSessionExtensions.{isCometOperatorEnabled, isCometScan, isSpark32, isSpark34Plus}
+import org.apache.comet.serde.ExprOuterClass.{AggExpr, DataType => ProtoDataType, Expr, ScalarFunc}
+import org.apache.comet.serde.ExprOuterClass.DataType.{DataTypeInfo, DecimalInfo, ListInfo, StructInfo}
+import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => CometAggregateMode, Operator}
+import org.apache.comet.shims.ShimQueryPlanSerde
+
+/**
+ * An utility object for query plan and expression serialization.
+ */
+object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
+  def emitWarning(reason: String): Unit = {
+    logWarning(s"Comet native execution is disabled due to: $reason")
+  }
+
+  def supportedDataType(dt: DataType): Boolean = dt match {
+    case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType |
+        _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType |
+        _: DateType | _: BooleanType | _: NullType =>
+      true
+    // `TimestampNTZType` is private in Spark 3.2.
+    case dt if dt.typeName == "timestamp_ntz" => true
+    case dt =>
+      emitWarning(s"unsupported Spark data type: $dt")
+      false
+  }
+
+  /**
+   * Serializes Spark datatype to protobuf. Note that, a datatype can be serialized by this method
+   * doesn't mean it is supported by Comet native execution, i.e., `supportedDataType` may return
+   * false for it.
+   */
+  def serializeDataType(dt: DataType): Option[ExprOuterClass.DataType] = {
+    val typeId = dt match {
+      case _: BooleanType => 0
+      case _: ByteType => 1
+      case _: ShortType => 2
+      case _: IntegerType => 3
+      case _: LongType => 4
+      case _: FloatType => 5
+      case _: DoubleType => 6
+      case _: StringType => 7
+      case _: BinaryType => 8
+      case _: TimestampType => 9
+      case _: DecimalType => 10
+      case dt if dt.typeName == "timestamp_ntz" => 11
+      case _: DateType => 12
+      case _: NullType => 13
+      case _: ArrayType => 14
+      case _: StructType => 15
+      case dt =>
+        emitWarning(s"Cannot serialize Spark data type: $dt")
+        return None
+    }
+
+    val builder = ProtoDataType.newBuilder()
+    builder.setTypeIdValue(typeId)
+
+    // Decimal
+    val dataType = dt match {
+      case t: DecimalType =>
+        val info = DataTypeInfo.newBuilder()
+        val decimal = DecimalInfo.newBuilder()
+        decimal.setPrecision(t.precision)
+        decimal.setScale(t.scale)
+        info.setDecimal(decimal)
+        builder.setTypeInfo(info.build()).build()
+
+      case a: ArrayType =>
+        val elementType = serializeDataType(a.elementType)
+
+        if (elementType.isEmpty) {
+          return None
+        }
+
+        val info = DataTypeInfo.newBuilder()
+        val list = ListInfo.newBuilder()
+        list.setElementType(elementType.get)
+        list.setContainsNull(a.containsNull)
+
+        info.setList(list)
+        builder.setTypeInfo(info.build()).build()
+
+      case s: StructType =>
+        val info = DataTypeInfo.newBuilder()
+        val struct = StructInfo.newBuilder()
+
+        val fieldNames = s.fields.map(_.name).toIterable.asJava
+        val fieldDatatypes = s.fields.map(f => serializeDataType(f.dataType)).toSeq
+        val fieldNullable = s.fields.map(f => Boolean.box(f.nullable)).toIterable.asJava
+
+        if (fieldDatatypes.exists(_.isEmpty)) {
+          return None
+        }
+
+        struct.addAllFieldNames(fieldNames)
+        struct.addAllFieldDatatypes(fieldDatatypes.map(_.get).asJava)
+        struct.addAllFieldNullable(fieldNullable)
+
+        info.setStruct(struct)
+        builder.setTypeInfo(info.build()).build()
+      case _ => builder.build()
+    }
+
+    Some(dataType)
+  }
+
+  private def sumDataTypeSupported(dt: DataType): Boolean = {
+    dt match {
+      case _: NumericType => true
+      case _ => false
+    }
+  }
+
+  private def avgDataTypeSupported(dt: DataType): Boolean = {
+    dt match {
+      case _: NumericType => true
+      // TODO: implement support for interval types
+      case _ => false
+    }
+  }
+
+  private def minMaxDataTypeSupported(dt: DataType): Boolean = {
+    dt match {
+      case _: NumericType | DateType | TimestampType => true
+      case _ => false
+    }
+  }
+
+  def aggExprToProto(aggExpr: AggregateExpression, inputs: Seq[Attribute]): Option[AggExpr] = {
+    aggExpr.aggregateFunction match {
+      case s @ Sum(child, _) if sumDataTypeSupported(s.dataType) =>
+        val childExpr = exprToProto(child, inputs)
+        val dataType = serializeDataType(s.dataType)
+
+        if (childExpr.isDefined && dataType.isDefined) {
+          val sumBuilder = ExprOuterClass.Sum.newBuilder()
+          sumBuilder.setChild(childExpr.get)
+          sumBuilder.setDatatype(dataType.get)
+          sumBuilder.setFailOnError(getFailOnError(s))
+
+          Some(
+            ExprOuterClass.AggExpr
+              .newBuilder()
+              .setSum(sumBuilder)
+              .build())
+        } else {
+          None
+        }
+      case s @ Average(child, _) if avgDataTypeSupported(s.dataType) =>
+        val childExpr = exprToProto(child, inputs)
+        val dataType = serializeDataType(s.dataType)
+
+        val sumDataType = if (child.dataType.isInstanceOf[DecimalType]) {
+
+          // This is input precision + 10 to be consistent with Spark
+          val precision = Math.min(
+            DecimalType.MAX_PRECISION,
+            child.dataType.asInstanceOf[DecimalType].precision + 10)
+          val newType =
+            DecimalType.apply(precision, child.dataType.asInstanceOf[DecimalType].scale)
+          serializeDataType(newType)
+        } else {
+          serializeDataType(child.dataType)
+        }
+
+        if (childExpr.isDefined && dataType.isDefined) {
+          val builder = ExprOuterClass.Avg.newBuilder()
+          builder.setChild(childExpr.get)
+          builder.setDatatype(dataType.get)
+          builder.setFailOnError(getFailOnError(s))
+          builder.setSumDatatype(sumDataType.get)
+
+          Some(
+            ExprOuterClass.AggExpr
+              .newBuilder()
+              .setAvg(builder)
+              .build())
+        } else {
+          None
+        }
+      case Count(children) =>
+        val exprChildren = children.map(exprToProto(_, inputs))
+
+        if (exprChildren.forall(_.isDefined)) {
+          val countBuilder = ExprOuterClass.Count.newBuilder()
+          countBuilder.addAllChildren(exprChildren.map(_.get).asJava)
+
+          Some(
+            ExprOuterClass.AggExpr
+              .newBuilder()
+              .setCount(countBuilder)
+              .build())
+        } else {
+          None
+        }
+      case min @ Min(child) if minMaxDataTypeSupported(min.dataType) =>
+        val childExpr = exprToProto(child, inputs)
+        val dataType = serializeDataType(min.dataType)
+
+        if (childExpr.isDefined && dataType.isDefined) {
+          val minBuilder = ExprOuterClass.Min.newBuilder()
+          minBuilder.setChild(childExpr.get)
+          minBuilder.setDatatype(dataType.get)
+
+          Some(
+            ExprOuterClass.AggExpr
+              .newBuilder()
+              .setMin(minBuilder)
+              .build())
+        } else {
+          None
+        }
+      case max @ Max(child) if minMaxDataTypeSupported(max.dataType) =>
+        val childExpr = exprToProto(child, inputs)
+        val dataType = serializeDataType(max.dataType)
+
+        if (childExpr.isDefined && dataType.isDefined) {
+          val maxBuilder = ExprOuterClass.Max.newBuilder()
+          maxBuilder.setChild(childExpr.get)
+          maxBuilder.setDatatype(dataType.get)
+
+          Some(
+            ExprOuterClass.AggExpr
+              .newBuilder()
+              .setMax(maxBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case fn =>
+        emitWarning(s"unsupported Spark aggregate function: $fn")
+        None
+    }
+  }
+
+  def exprToProto(expr: Expression, input: Seq[Attribute]): Option[Expr] = {
+    val conf = SQLConf.get
+    val newExpr =
+      DecimalPrecision.promote(conf.decimalOperationsAllowPrecisionLoss, expr, !conf.ansiEnabled)
+    exprToProtoInternal(newExpr, input)
+  }
+
+  def exprToProtoInternal(expr: Expression, inputs: Seq[Attribute]): Option[Expr] = {
+    SQLConf.get
+    expr match {
+      case a @ Alias(_, _) =>
+        exprToProtoInternal(a.child, inputs)
+
+      case cast @ Cast(_: Literal, dataType, _, _) =>
+        // This can happen after promoting decimal precisions
+        val value = cast.eval()
+        exprToProtoInternal(Literal(value, dataType), inputs)
+
+      case Cast(child, dt, timeZoneId, _) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        val dataType = serializeDataType(dt)
+
+        if (childExpr.isDefined && dataType.isDefined) {
+          val castBuilder = ExprOuterClass.Cast.newBuilder()
+          castBuilder.setChild(childExpr.get)
+          castBuilder.setDatatype(dataType.get)
+
+          val timeZone = timeZoneId.getOrElse("UTC")
+          castBuilder.setTimezone(timeZone)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setCast(castBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case add @ Add(left, right, _) if supportedDataType(left.dataType) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val addBuilder = ExprOuterClass.Add.newBuilder()
+          addBuilder.setLeft(leftExpr.get)
+          addBuilder.setRight(rightExpr.get)
+          addBuilder.setFailOnError(getFailOnError(add))
+          serializeDataType(add.dataType).foreach { t =>
+            addBuilder.setReturnType(t)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setAdd(addBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case sub @ Subtract(left, right, _) if supportedDataType(left.dataType) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Subtract.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+          builder.setFailOnError(getFailOnError(sub))
+          serializeDataType(sub.dataType).foreach { t =>
+            builder.setReturnType(t)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setSubtract(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case mul @ Multiply(left, right, _)
+          if supportedDataType(left.dataType) && !decimalBeforeSpark34(left.dataType) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Multiply.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+          builder.setFailOnError(getFailOnError(mul))
+          serializeDataType(mul.dataType).foreach { t =>
+            builder.setReturnType(t)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setMultiply(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case div @ Divide(left, right, _)
+          if supportedDataType(left.dataType) && !decimalBeforeSpark34(left.dataType) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        // Datafusion now throws an exception for dividing by zero
+        // See https://github.com/apache/arrow-datafusion/pull/6792
+        // For now, use NullIf to swap zeros with nulls.
+        val rightExpr = exprToProtoInternal(nullIfWhenPrimitive(right), inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Divide.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+          builder.setFailOnError(getFailOnError(div))
+          serializeDataType(div.dataType).foreach { t =>
+            builder.setReturnType(t)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setDivide(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case rem @ Remainder(left, right, _)
+          if supportedDataType(left.dataType) && !decimalBeforeSpark34(left.dataType) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(nullIfWhenPrimitive(right), inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Remainder.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+          builder.setFailOnError(getFailOnError(rem))
+          serializeDataType(rem.dataType).foreach { t =>
+            builder.setReturnType(t)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setRemainder(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case EqualTo(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Equal.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setEq(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Not(EqualTo(left, right)) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.NotEqual.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setNeq(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case EqualNullSafe(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.EqualNullSafe.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setEqNullSafe(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Not(EqualNullSafe(left, right)) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.NotEqualNullSafe.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setNeqNullSafe(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case GreaterThan(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.GreaterThan.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setGt(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case GreaterThanOrEqual(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.GreaterThanEqual.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setGtEq(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case LessThan(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.LessThan.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setLt(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case LessThanOrEqual(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.LessThanEqual.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setLtEq(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Literal(value, dataType) if supportedDataType(dataType) =>
+        val exprBuilder = ExprOuterClass.Literal.newBuilder()
+
+        if (value == null) {
+          exprBuilder.setIsNull(true)
+        } else {
+          exprBuilder.setIsNull(false)
+          dataType match {
+            case _: BooleanType => exprBuilder.setBoolVal(value.asInstanceOf[Boolean])
+            case _: ByteType => exprBuilder.setByteVal(value.asInstanceOf[Byte])
+            case _: ShortType => exprBuilder.setShortVal(value.asInstanceOf[Short])
+            case _: IntegerType => exprBuilder.setIntVal(value.asInstanceOf[Int])
+            case _: LongType => exprBuilder.setLongVal(value.asInstanceOf[Long])
+            case _: FloatType => exprBuilder.setFloatVal(value.asInstanceOf[Float])
+            case _: DoubleType => exprBuilder.setDoubleVal(value.asInstanceOf[Double])
+            case _: StringType =>
+              exprBuilder.setStringVal(value.asInstanceOf[UTF8String].toString)
+            case _: TimestampType => exprBuilder.setLongVal(value.asInstanceOf[Long])
+            case _: DecimalType =>
+              // Pass decimal literal as bytes.
+              val unscaled = value.asInstanceOf[Decimal].toBigDecimal.underlying.unscaledValue
+              exprBuilder.setDecimalVal(
+                com.google.protobuf.ByteString.copyFrom(unscaled.toByteArray))
+            case _: BinaryType =>
+              val byteStr =
+                com.google.protobuf.ByteString.copyFrom(value.asInstanceOf[Array[Byte]])
+              exprBuilder.setBytesVal(byteStr)
+            case _: DateType => exprBuilder.setIntVal(value.asInstanceOf[Int])
+            case dt =>
+              logWarning(s"Unexpected date type '$dt' for literal value '$value'")
+          }
+        }
+
+        val dt = serializeDataType(dataType)
+
+        if (dt.isDefined) {
+          exprBuilder.setDatatype(dt.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setLiteral(exprBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case Substring(str, Literal(pos, _), Literal(len, _)) =>
+        val strExpr = exprToProtoInternal(str, inputs)
+
+        if (strExpr.isDefined) {
+          val builder = ExprOuterClass.Substring.newBuilder()
+          builder.setChild(strExpr.get)
+          builder.setStart(pos.asInstanceOf[Int])
+          builder.setLen(len.asInstanceOf[Int])
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setSubstring(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Like(left, right, _) =>
+        // TODO escapeChar
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Like.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setLike(builder)
+              .build())
+        } else {
+          None
+        }
+
+      // TODO waiting for arrow-rs update
+//      case RLike(left, right) =>
+//        val leftExpr = exprToProtoInternal(left, inputs)
+//        val rightExpr = exprToProtoInternal(right, inputs)
+//
+//        if (leftExpr.isDefined && rightExpr.isDefined) {
+//          val builder = ExprOuterClass.RLike.newBuilder()
+//          builder.setLeft(leftExpr.get)
+//          builder.setRight(rightExpr.get)
+//
+//          Some(
+//            ExprOuterClass.Expr
+//              .newBuilder()
+//              .setRlike(builder)
+//              .build())
+//        } else {
+//          None
+//        }
+
+      case StartsWith(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.StartsWith.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setStartsWith(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case EndsWith(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.EndsWith.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setEndsWith(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Contains(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Contains.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setContains(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case StringSpace(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.StringSpace.newBuilder()
+          builder.setChild(childExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setStringSpace(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Hour(child, timeZoneId) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.Hour.newBuilder()
+          builder.setChild(childExpr.get)
+
+          val timeZone = timeZoneId.getOrElse("UTC")
+          builder.setTimezone(timeZone)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setHour(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Minute(child, timeZoneId) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.Minute.newBuilder()
+          builder.setChild(childExpr.get)
+
+          val timeZone = timeZoneId.getOrElse("UTC")
+          builder.setTimezone(timeZone)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setMinute(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case TruncDate(child, format) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        val formatExpr = exprToProtoInternal(format, inputs)
+
+        if (childExpr.isDefined && formatExpr.isDefined) {
+          val builder = ExprOuterClass.TruncDate.newBuilder()
+          builder.setChild(childExpr.get)
+          builder.setFormat(formatExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setTruncDate(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case TruncTimestamp(format, child, timeZoneId) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        val formatExpr = exprToProtoInternal(format, inputs)
+
+        if (childExpr.isDefined && formatExpr.isDefined) {
+          val builder = ExprOuterClass.TruncTimestamp.newBuilder()
+          builder.setChild(childExpr.get)
+          builder.setFormat(formatExpr.get)
+
+          val timeZone = timeZoneId.getOrElse("UTC")
+          builder.setTimezone(timeZone)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setTruncTimestamp(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Second(child, timeZoneId) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.Second.newBuilder()
+          builder.setChild(childExpr.get)
+
+          val timeZone = timeZoneId.getOrElse("UTC")
+          builder.setTimezone(timeZone)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setSecond(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Year(child) =>
+        val periodType = exprToProtoInternal(Literal("year"), inputs)
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("datepart", Seq(periodType, childExpr): _*)
+          .map(e => {
+            Expr
+              .newBuilder()
+              .setCast(
+                ExprOuterClass.Cast
+                  .newBuilder()
+                  .setChild(e)
+                  .setDatatype(serializeDataType(IntegerType).get)
+                  .build())
+              .build()
+          })
+
+      case IsNull(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val castBuilder = ExprOuterClass.IsNull.newBuilder()
+          castBuilder.setChild(childExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setIsNull(castBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case IsNotNull(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val castBuilder = ExprOuterClass.IsNotNull.newBuilder()
+          castBuilder.setChild(childExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setIsNotNull(castBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case SortOrder(child, direction, nullOrdering, _) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val sortOrderBuilder = ExprOuterClass.SortOrder.newBuilder()
+          sortOrderBuilder.setChild(childExpr.get)
+
+          direction match {
+            case Ascending => sortOrderBuilder.setDirectionValue(0)
+            case Descending => sortOrderBuilder.setDirectionValue(1)
+          }
+
+          nullOrdering match {
+            case NullsFirst => sortOrderBuilder.setNullOrderingValue(0)
+            case NullsLast => sortOrderBuilder.setNullOrderingValue(1)
+          }
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setSortOrder(sortOrderBuilder)
+              .build())
+        } else {
+          None
+        }
+
+      case And(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.And.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setAnd(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case Or(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.Or.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setOr(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case UnaryExpression(child) if expr.prettyName == "promote_precision" =>
+        // `UnaryExpression` includes `PromotePrecision` for Spark 3.2 & 3.3
+        // `PromotePrecision` is just a wrapper, don't need to serialize it.
+        exprToProtoInternal(child, inputs)
+
+      case CheckOverflow(child, dt, nullOnOverflow) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.CheckOverflow.newBuilder()
+          builder.setChild(childExpr.get)
+          builder.setFailOnError(!nullOnOverflow)
+
+          // `dataType` must be decimal type
+          val dataType = serializeDataType(dt)
+          builder.setDatatype(dataType.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setCheckOverflow(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case attr: AttributeReference =>
+        val dataType = serializeDataType(attr.dataType)
+
+        if (dataType.isDefined) {
+          val boundRef = BindReferences
+            .bindReference(attr, inputs, allowFailures = false)
+            .asInstanceOf[BoundReference]
+          val boundExpr = ExprOuterClass.BoundReference
+            .newBuilder()
+            .setIndex(boundRef.ordinal)
+            .setDatatype(dataType.get)
+            .build()
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBound(boundExpr)
+              .build())
+        } else {
+          None
+        }
+
+      case Abs(child, _) =>
+        exprToProtoInternal(child, inputs).map(childExpr => {
+          val abs =
+            ExprOuterClass.Abs
+              .newBuilder()
+              .setChild(childExpr)
+              .build()
+          Expr.newBuilder().setAbs(abs).build()
+        })
+
+      case Acos(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("acos", childExpr)
+
+      case Asin(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("asin", childExpr)
+
+      case Atan(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("atan", childExpr)
+
+      case Atan2(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+        scalarExprToProto("atan2", leftExpr, rightExpr)
+
+      case e @ Ceil(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        child.dataType match {
+          case t: DecimalType if t.scale == 0 => // zero scale is no-op
+            childExpr
+          case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252
+            None
+          case _ =>
+            scalarExprToProtoWithReturnType("ceil", e.dataType, childExpr)
+        }
+
+      case Cos(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("cos", childExpr)
+
+      case Exp(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("exp", childExpr)
+
+      case e @ Floor(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        child.dataType match {
+          case t: DecimalType if t.scale == 0 => // zero scale is no-op
+            childExpr
+          case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252
+            None
+          case _ =>
+            scalarExprToProtoWithReturnType("floor", e.dataType, childExpr)
+        }
+
+      case Log(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("ln", childExpr)
+
+      case Log10(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("log10", childExpr)
+
+      case Log2(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("log2", childExpr)
+
+      case Pow(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+        scalarExprToProto("pow", leftExpr, rightExpr)
+
+      // round function for Spark 3.2 does not allow negative round target scale. In addition,
+      // it has different result precision/scale for decimals. Supporting only 3.3 and above.
+      case r: Round if !isSpark32 =>
+        // _scale s a constant, copied from Spark's RoundBase because it is a protected val
+        val scaleV: Any = r.scale.eval(EmptyRow)
+        val _scale: Int = scaleV.asInstanceOf[Int]
+
+        lazy val childExpr = exprToProtoInternal(r.child, inputs)
+        r.child.dataType match {
+          case t: DecimalType if t.scale < 0 => // Spark disallows negative scale SPARK-30252
+            None
+          case _ if scaleV == null =>
+            exprToProtoInternal(Literal(null), inputs)
+          case _: ByteType | ShortType | IntegerType | LongType if _scale >= 0 =>
+            childExpr // _scale(I.e. decimal place) >= 0 is a no-op for integer types in Spark
+          case _: FloatType | DoubleType =>
+            // We cannot properly match with the Spark behavior for floating-point numbers.
+            // Spark uses BigDecimal for rounding float/double, and BigDecimal fist converts a
+            // double to string internally in order to create its own internal representation.
+            // The problem is BigDecimal uses java.lang.Double.toString() and it has complicated
+            // rounding algorithm. E.g. -5.81855622136895E8 is actually
+            // -581855622.13689494132995605468750. Note the 5th fractional digit is 4 instead of
+            // 5. Java(Scala)'s toString() rounds it up to -581855622.136895. This makes a
+            // difference when rounding at 5th digit, I.e. round(-5.81855622136895E8, 5) should be
+            // -5.818556221369E8, instead of -5.8185562213689E8. There is also an example that
+            // toString() does NOT round up. 6.1317116247283497E18 is 6131711624728349696. It can
+            // be rounded up to 6.13171162472835E18 that still represents the same double number.
+            // I.e. 6.13171162472835E18 == 6.1317116247283497E18. However, toString() does not.
+            // That results in round(6.1317116247283497E18, -5) == 6.1317116247282995E18 instead
+            // of 6.1317116247283999E18.
+            None
+          case _ =>
+            // `scale` must be Int64 type in DataFusion
+            val scaleExpr = exprToProtoInternal(Literal(_scale.toLong, LongType), inputs)
+            scalarExprToProtoWithReturnType("round", r.dataType, childExpr, scaleExpr)
+        }
+
+      case Signum(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("signum", childExpr)
+
+      case Sin(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("sin", childExpr)
+
+      case Sqrt(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("sqrt", childExpr)
+
+      case Tan(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("tan", childExpr)
+
+      case Ascii(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("ascii", childExpr)
+
+      case BitLength(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("bit_length", childExpr)
+
+      case If(predicate, trueValue, falseValue) =>
+        val predicateExpr = exprToProtoInternal(predicate, inputs)
+        val trueExpr = exprToProtoInternal(trueValue, inputs)
+        val falseExpr = exprToProtoInternal(falseValue, inputs)
+        if (predicateExpr.isDefined && trueExpr.isDefined && falseExpr.isDefined) {
+          val builder = ExprOuterClass.IfExpr.newBuilder()
+          builder.setIfExpr(predicateExpr.get)
+          builder.setTrueExpr(trueExpr.get)
+          builder.setFalseExpr(falseExpr.get)
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setIf(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case CaseWhen(branches, elseValue) =>
+        val whenSeq = branches.map(elements => exprToProtoInternal(elements._1, inputs))
+        val thenSeq = branches.map(elements => exprToProtoInternal(elements._2, inputs))
+        assert(whenSeq.length == thenSeq.length)
+        if (whenSeq.forall(_.isDefined) && thenSeq.forall(_.isDefined)) {
+          val builder = ExprOuterClass.CaseWhen.newBuilder()
+          builder.addAllWhen(whenSeq.map(_.get).asJava)
+          builder.addAllThen(thenSeq.map(_.get).asJava)
+          if (elseValue.isDefined) {
+            val elseValueExpr = exprToProtoInternal(elseValue.get, inputs)
+            if (elseValueExpr.isDefined) {
+              builder.setElseExpr(elseValueExpr.get)
+            } else {
+              return None
+            }
+          }
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setCaseWhen(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case ConcatWs(children) =>
+        val exprs = children.map(e => exprToProtoInternal(Cast(e, StringType), inputs))
+        scalarExprToProto("concat_ws", exprs: _*)
+
+      case Chr(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProto("chr", childExpr)
+
+      case InitCap(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("initcap", childExpr)
+
+      case Length(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("length", childExpr)
+
+      case Lower(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("lower", childExpr)
+
+      case Md5(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("md5", childExpr)
+
+      case OctetLength(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("octet_length", childExpr)
+
+      case Reverse(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("reverse", childExpr)
+
+      case StringInstr(str, substr) =>
+        val leftExpr = exprToProtoInternal(Cast(str, StringType), inputs)
+        val rightExpr = exprToProtoInternal(Cast(substr, StringType), inputs)
+        scalarExprToProto("strpos", leftExpr, rightExpr)
+
+      case StringRepeat(str, times) =>
+        val leftExpr = exprToProtoInternal(Cast(str, StringType), inputs)
+        val rightExpr = exprToProtoInternal(Cast(times, LongType), inputs)
+        scalarExprToProto("repeat", leftExpr, rightExpr)
+
+      case StringReplace(src, search, replace) =>
+        val srcExpr = exprToProtoInternal(Cast(src, StringType), inputs)
+        val searchExpr = exprToProtoInternal(Cast(search, StringType), inputs)
+        val replaceExpr = exprToProtoInternal(Cast(replace, StringType), inputs)
+        scalarExprToProto("replace", srcExpr, searchExpr, replaceExpr)
+
+      case StringTranslate(src, matching, replace) =>
+        val srcExpr = exprToProtoInternal(Cast(src, StringType), inputs)
+        val matchingExpr = exprToProtoInternal(Cast(matching, StringType), inputs)
+        val replaceExpr = exprToProtoInternal(Cast(replace, StringType), inputs)
+        scalarExprToProto("translate", srcExpr, matchingExpr, replaceExpr)
+
+      case StringTrim(srcStr, trimStr) =>
+        trim(srcStr, trimStr, inputs, "trim")
+
+      case StringTrimLeft(srcStr, trimStr) =>
+        trim(srcStr, trimStr, inputs, "ltrim")
+
+      case StringTrimRight(srcStr, trimStr) =>
+        trim(srcStr, trimStr, inputs, "rtrim")
+
+      case StringTrimBoth(srcStr, trimStr, _) =>
+        trim(srcStr, trimStr, inputs, "btrim")
+
+      case Upper(child) =>
+        val childExpr = exprToProtoInternal(Cast(child, StringType), inputs)
+        scalarExprToProto("upper", childExpr)
+
+      case BitwiseAnd(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseAnd.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseAnd(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case BitwiseNot(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseNot.newBuilder()
+          builder.setChild(childExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseNot(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case BitwiseOr(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseOr.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseOr(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case BitwiseXor(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseXor.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseXor(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case ShiftRight(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseShiftRight.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseShiftRight(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case ShiftLeft(left, right) =>
+        val leftExpr = exprToProtoInternal(left, inputs)
+        val rightExpr = exprToProtoInternal(right, inputs)
+
+        if (leftExpr.isDefined && rightExpr.isDefined) {
+          val builder = ExprOuterClass.BitwiseShiftLeft.newBuilder()
+          builder.setLeft(leftExpr.get)
+          builder.setRight(rightExpr.get)
+
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setBitwiseShiftLeft(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case In(value, list) =>
+        in(value, list, inputs, false)
+
+      case Not(In(value, list)) =>
+        in(value, list, inputs, true)
+
+      case Not(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.Not.newBuilder()
+          builder.setChild(childExpr.get)
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setNot(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case UnaryMinus(child, _) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        if (childExpr.isDefined) {
+          val builder = ExprOuterClass.Negative.newBuilder()
+          builder.setChild(childExpr.get)
+          Some(
+            ExprOuterClass.Expr
+              .newBuilder()
+              .setNegative(builder)
+              .build())
+        } else {
+          None
+        }
+
+      case a @ Coalesce(_) =>
+        val exprChildren = a.children.map(exprToProtoInternal(_, inputs))
+        scalarExprToProto("coalesce", exprChildren: _*)
+
+      // With Spark 3.4, CharVarcharCodegenUtils.readSidePadding gets called to pad spaces for char
+      // types. Use rpad to achieve the behavior. See https://github.com/apache/spark/pull/38151
+      case StaticInvoke(
+            _: Class[CharVarcharCodegenUtils],
+            _: StringType,
+            "readSidePadding",
+            arguments,
+            _,
+            true,
+            false,
+            true) if arguments.size == 2 =>
+        val argsExpr = Seq(
+          exprToProtoInternal(Cast(arguments(0), StringType), inputs),
+          exprToProtoInternal(arguments(1), inputs))
+
+        if (argsExpr.forall(_.isDefined)) {
+          val builder = ExprOuterClass.ScalarFunc.newBuilder()
+          builder.setFunc("rpad")
+          argsExpr.foreach(arg => builder.addArgs(arg.get))
+
+          Some(ExprOuterClass.Expr.newBuilder().setScalarFunc(builder).build())
+        } else {
+          None
+        }
+
+      case KnownFloatingPointNormalized(NormalizeNaNAndZero(expr)) =>
+        val dataType = serializeDataType(expr.dataType)
+        if (dataType.isEmpty) {
+          return None
+        }
+        exprToProtoInternal(expr, inputs).map { child =>
+          val builder = ExprOuterClass.NormalizeNaNAndZero
+            .newBuilder()
+            .setChild(child)
+            .setDatatype(dataType.get)
+          ExprOuterClass.Expr.newBuilder().setNormalizeNanAndZero(builder).build()
+        }
+
+      case s @ execution.ScalarSubquery(_, _) =>
+        val dataType = serializeDataType(s.dataType)
+        if (dataType.isEmpty) {
+          return None
+        }
+
+        val builder = ExprOuterClass.Subquery
+          .newBuilder()
+          .setId(s.exprId.id)
+          .setDatatype(dataType.get)
+        Some(ExprOuterClass.Expr.newBuilder().setSubquery(builder).build())
+
+      case UnscaledValue(child) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProtoWithReturnType("unscaled_value", LongType, childExpr)
+
+      case MakeDecimal(child, precision, scale, true) =>
+        val childExpr = exprToProtoInternal(child, inputs)
+        scalarExprToProtoWithReturnType("make_decimal", DecimalType(precision, scale), childExpr)
+
+      case e =>
+        emitWarning(s"unsupported Spark expression: '$e' of class '${e.getClass.getName}")
+        None
+    }
+  }
+
+  private def trim(
+      srcStr: Expression,
+      trimStr: Option[Expression],
+      inputs: Seq[Attribute],
+      trimType: String): Option[Expr] = {
+    val srcExpr = exprToProtoInternal(Cast(srcStr, StringType), inputs)
+    if (trimStr.isDefined) {
+      val trimExpr = exprToProtoInternal(Cast(trimStr.get, StringType), inputs)
+      scalarExprToProto(trimType, srcExpr, trimExpr)
+    } else {
+      scalarExprToProto(trimType, srcExpr)
+    }
+  }
+
+  private def in(
+      value: Expression,
+      list: Seq[Expression],
+      inputs: Seq[Attribute],
+      negate: Boolean): Option[Expr] = {
+    val valueExpr = exprToProtoInternal(value, inputs)
+    val listExprs = list.map(exprToProtoInternal(_, inputs))
+    if (valueExpr.isDefined && listExprs.forall(_.isDefined)) {
+      val builder = ExprOuterClass.In.newBuilder()
+      builder.setInValue(valueExpr.get)
+      builder.addAllLists(listExprs.map(_.get).asJava)
+      builder.setNegated(negate)
+      Some(
+        ExprOuterClass.Expr
+          .newBuilder()
+          .setIn(builder)
+          .build())
+    } else {
+      None
+    }
+  }
+
+  def scalarExprToProtoWithReturnType(
+      funcName: String,
+      returnType: DataType,
+      args: Option[Expr]*): Option[Expr] = {
+    val builder = ExprOuterClass.ScalarFunc.newBuilder()
+    builder.setFunc(funcName)
+    serializeDataType(returnType).flatMap { t =>
+      builder.setReturnType(t)
+      scalarExprToProto0(builder, args: _*)
+    }
+  }
+
+  def scalarExprToProto(funcName: String, args: Option[Expr]*): Option[Expr] = {
+    val builder = ExprOuterClass.ScalarFunc.newBuilder()
+    builder.setFunc(funcName)
+    scalarExprToProto0(builder, args: _*)
+  }
+
+  private def scalarExprToProto0(
+      builder: ScalarFunc.Builder,
+      args: Option[Expr]*): Option[Expr] = {
+    args.foreach {
+      case Some(a) => builder.addArgs(a)
+      case _ => return None
+    }
+    Some(ExprOuterClass.Expr.newBuilder().setScalarFunc(builder).build())
+  }
+
+  def isPrimitive(expression: Expression): Boolean = expression.dataType match {
+    case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType |
+        _: DoubleType | _: TimestampType | _: DateType | _: BooleanType | _: DecimalType =>
+      true
+    case _ => false
+  }
+
+  def nullIfWhenPrimitive(expression: Expression): Expression = if (isPrimitive(expression)) {
+    new NullIf(expression, Literal.default(expression.dataType)).child
+  } else {
+    expression
+  }
+
+  /**
+   * Convert a Spark plan operator to a protobuf Comet operator.
+   *
+   * @param op
+   *   Spark plan operator
+   * @param childOp
+   *   previously converted protobuf Comet operators, which will be consumed by the Spark plan
+   *   operator as its children
+   * @return
+   *   The converted Comet native operator for the input `op`, or `None` if the `op` cannot be
+   *   converted to a native operator.
+   */
+  def operator2Proto(op: SparkPlan, childOp: Operator*): Option[Operator] = {
+    val result = OperatorOuterClass.Operator.newBuilder()
+    childOp.foreach(result.addChildren)
+
+    op match {
+      case ProjectExec(projectList, child) if isCometOperatorEnabled(op.conf, "project") =>
+        val exprs = projectList.map(exprToProto(_, child.output))
+
+        if (exprs.forall(_.isDefined) && childOp.nonEmpty) {
+          val projectBuilder = OperatorOuterClass.Projection
+            .newBuilder()
+            .addAllProjectList(exprs.map(_.get).asJava)
+          Some(result.setProjection(projectBuilder).build())
+        } else {
+          None
+        }
+
+      case FilterExec(condition, child) if isCometOperatorEnabled(op.conf, "filter") =>
+        val cond = exprToProto(condition, child.output)
+
+        if (cond.isDefined && childOp.nonEmpty) {
+          val filterBuilder = OperatorOuterClass.Filter.newBuilder().setPredicate(cond.get)
+          Some(result.setFilter(filterBuilder).build())
+        } else {
+          None
+        }
+
+      case SortExec(sortOrder, _, child, _) if isCometOperatorEnabled(op.conf, "sort") =>
+        val sortOrders = sortOrder.map(exprToProto(_, child.output))
+
+        if (sortOrders.forall(_.isDefined) && childOp.nonEmpty) {
+          val sortBuilder = OperatorOuterClass.Sort
+            .newBuilder()
+            .addAllSortOrders(sortOrders.map(_.get).asJava)
+          Some(result.setSort(sortBuilder).build())
+        } else {
+          None
+        }
+
+      case LocalLimitExec(limit, _) if isCometOperatorEnabled(op.conf, "local_limit") =>
+        if (childOp.nonEmpty) {
+          // LocalLimit doesn't use offset, but it shares same operator serde class.
+          // Just set it to zero.
+          val limitBuilder = OperatorOuterClass.Limit
+            .newBuilder()
+            .setLimit(limit)
+            .setOffset(0)
+          Some(result.setLimit(limitBuilder).build())
+        } else {
+          None
+        }
+
+      case globalLimitExec: GlobalLimitExec if isCometOperatorEnabled(op.conf, "global_limit") =>
+        if (childOp.nonEmpty) {
+          val limitBuilder = OperatorOuterClass.Limit.newBuilder()
+
+          // Spark 3.2 doesn't support offset for GlobalLimit, but newer Spark versions
+          // support it. Before we upgrade to Spark 3.3, just set it zero.
+          // TODO: Spark 3.3 might have negative limit (-1) for Offset usage.
+          // When we upgrade to Spark 3.3., we need to address it here.
+          assert(globalLimitExec.limit >= 0, "limit should be greater or equal to zero")
+          limitBuilder.setLimit(globalLimitExec.limit)
+          limitBuilder.setOffset(0)
+
+          Some(result.setLimit(limitBuilder).build())
+        } else {
+          None
+        }
+
+      case ExpandExec(projections, _, child) if isCometOperatorEnabled(op.conf, "expand") =>
+        val projExprs = projections.flatMap(_.map(exprToProto(_, child.output)))
+
+        if (projExprs.forall(_.isDefined) && childOp.nonEmpty) {
+          val expandBuilder = OperatorOuterClass.Expand
+            .newBuilder()
+            .addAllProjectList(projExprs.map(_.get).asJava)
+            .setNumExprPerProject(projections.head.size)
+          Some(result.setExpand(expandBuilder).build())
+        } else {
+          None
+        }
+
+      case HashAggregateExec(
+            _,
+            _,
+            _,
+            groupingExpressions,
+            aggregateExpressions,
+            _,
+            _,
+            resultExpressions,
+            child) if isCometOperatorEnabled(op.conf, "aggregate") =>
+        val modes = aggregateExpressions.map(_.mode).distinct
+
+        if (modes.size != 1) {
+          // This shouldn't happen as all aggregation expressions should share the same mode.
+          // Fallback to Spark nevertheless here.
+          return None
+        }
+
+        val mode = modes.head match {
+          case Partial => CometAggregateMode.Partial
+          case Final => CometAggregateMode.Final
+          case _ => return None
+        }
+
+        val output = mode match {
+          case CometAggregateMode.Partial => child.output
+          case CometAggregateMode.Final =>
+            // Assuming `Final` always follows `Partial` aggregation, this find the first
+            // `Partial` aggregation and get the input attributes from it.
+            child.collectFirst { case CometHashAggregateExec(_, _, _, _, input, Partial, _) =>
+              input
+            } match {
+              case Some(input) => input
+              case _ => return None
+            }
+          case _ => return None
+        }
+
+        val aggExprs = aggregateExpressions.map(aggExprToProto(_, output))
+        val groupingExprs = groupingExpressions.map(exprToProto(_, child.output))
+
+        if (childOp.nonEmpty && groupingExprs.forall(_.isDefined) &&
+          aggExprs.forall(_.isDefined)) {
+          val hashAggBuilder = OperatorOuterClass.HashAggregate.newBuilder()
+          hashAggBuilder.addAllGroupingExprs(groupingExprs.map(_.get).asJava)
+          hashAggBuilder.addAllAggExprs(aggExprs.map(_.get).asJava)
+          if (mode == CometAggregateMode.Final) {
+            val attributes = groupingExpressions.map(_.toAttribute) ++
+              aggregateExpressions.map(_.resultAttribute)
+            val resultExprs = resultExpressions.map(exprToProto(_, attributes))
+            if (resultExprs.exists(_.isEmpty)) {
+              emitWarning(s"Unsupported result expressions found in: ${resultExpressions}")
+              return None
+            }
+            hashAggBuilder.addAllResultExprs(resultExprs.map(_.get).asJava)
+          }
+          hashAggBuilder.setModeValue(mode.getNumber)
+          Some(result.setHashAgg(hashAggBuilder).build())
+        } else {
+          None
+        }
+
+      case op if isCometSink(op) =>
+        // These operators are source of Comet native execution chain
+        val scanBuilder = OperatorOuterClass.Scan.newBuilder()
+
+        val scanTypes = op.output.flatten { attr =>
+          serializeDataType(attr.dataType)
+        }
+
+        if (scanTypes.length == op.output.length) {
+          scanBuilder.addAllFields(scanTypes.asJava)
+          Some(result.setScan(scanBuilder).build())
+        } else {
+          // There are unsupported scan type
+          emitWarning(
+            s"unsupported Comet operator: ${op.nodeName}, due to unsupported data types above")
+          None
+        }
+
+      case op =>
+        // Emit warning if:
+        //  1. it is not Spark shuffle operator, which is handled separately
+        //  2. it is not a Comet operator
+        if (!op.nodeName.contains("Comet") && !op.isInstanceOf[ShuffleExchangeExec]) {
+          emitWarning(s"unsupported Spark operator: ${op.nodeName}")
+        }
+        None
+    }
+  }
+
+  /**
+   * Whether the input Spark operator `op` can be considered as a Comet sink, i.e., the start of
+   * native execution. If it is true, we'll wrap `op` with `CometScanWrapper` or
+   * `CometSinkPlaceHolder` later in `CometSparkSessionExtensions` after `operator2proto` is
+   * called.
+   */
+  private def isCometSink(op: SparkPlan): Boolean = {
+    op match {
+      case s if isCometScan(s) => true
+      case _: CometSinkPlaceHolder => true
+      case _: CoalesceExec => true
+      case _: UnionExec => true
+      case _: ShuffleExchangeExec => true
+      case _: TakeOrderedAndProjectExec => true
+      case _ => false
+    }
+  }
+
+  /**
+   * Checks whether `dt` is a decimal type AND whether Spark version is before 3.4
+   */
+  private def decimalBeforeSpark34(dt: DataType): Boolean = {
+    !isSpark34Plus && (dt match {
+      case _: DecimalType => true
+      case _ => false
+    })
+  }
+
+  /**
+   * Check if the datatypes of shuffle input are supported. This is used for Columnar shuffle
+   * which supports struct/array.
+   */
+  def supportPartitioningTypes(inputs: Seq[Attribute]): Boolean = {
+    def supportedDataType(dt: DataType): Boolean = dt match {
+      case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType |
+          _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType |
+          _: DateType | _: BooleanType =>
+        true
+      case StructType(fields) =>
+        fields.forall(f => supportedDataType(f.dataType))
+      case ArrayType(ArrayType(_, _), _) => false // TODO: nested array is not supported
+      case ArrayType(elementType, _) =>
+        supportedDataType(elementType)
+      case _ =>
+        false
+    }
+
+    // Check if the datatypes of shuffle input are supported.
+    val supported = inputs.forall(attr => supportedDataType(attr.dataType))
+    if (!supported) {
+      emitWarning(s"unsupported Spark partitioning: ${inputs.map(_.dataType)}")
+    }
+    supported
+  }
+
+  /**
+   * Whether the given Spark partitioning is supported by Comet.
+   */
+  def supportPartitioning(inputs: Seq[Attribute], partitioning: Partitioning): Boolean = {
+    def supportedDataType(dt: DataType): Boolean = dt match {
+      case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType |
+          _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType |
+          _: DateType | _: BooleanType =>
+        true
+      case _ =>
+        // Native shuffle doesn't support struct/array yet
+        false
+    }
+
+    // Check if the datatypes of shuffle input are supported.
+    val supported = inputs.forall(attr => supportedDataType(attr.dataType))
+
+    if (!supported) {
+      emitWarning(s"unsupported Spark partitioning: ${inputs.map(_.dataType)}")
+      false
+    } else {
+      partitioning match {
+        case HashPartitioning(expressions, _) =>
+          expressions.map(QueryPlanSerde.exprToProto(_, inputs)).forall(_.isDefined)
+        case SinglePartition => true
+        case other =>
+          emitWarning(s"unsupported Spark partitioning: ${other.getClass.getName}")
+          false
+      }
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/shims/ShimCometBatchScanExec.scala b/spark/src/main/scala/org/apache/comet/shims/ShimCometBatchScanExec.scala
new file mode 100644
index 000000000..9e7cc3ba5
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/shims/ShimCometBatchScanExec.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, SortOrder}
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+
+trait ShimCometBatchScanExec {
+  def wrapped: BatchScanExec
+
+  // Only for Spark 3.3+
+  def keyGroupedPartitioning: Option[Seq[Expression]] = wrapped.getClass.getDeclaredMethods
+    .filter(_.getName == "keyGroupedPartitioning")
+    .flatMap(_.invoke(wrapped).asInstanceOf[Option[Seq[Expression]]])
+    .headOption
+
+  // Only for Spark 3.3+
+  def inputPartitions: Seq[InputPartition] = wrapped.getClass.getDeclaredMethods
+    .filter(_.getName == "inputPartitions")
+    .flatMap(_.invoke(wrapped).asInstanceOf[Seq[InputPartition]])
+
+  // Only for Spark 3.4+
+  def ordering: Option[Seq[SortOrder]] = wrapped.getClass.getDeclaredMethods
+    .filter(_.getName == "ordering")
+    .flatMap(_.invoke(wrapped).asInstanceOf[Option[Seq[SortOrder]]])
+    .headOption
+}
diff --git a/spark/src/main/scala/org/apache/comet/shims/ShimCometScanExec.scala b/spark/src/main/scala/org/apache/comet/shims/ShimCometScanExec.scala
new file mode 100644
index 000000000..18871b814
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/shims/ShimCometScanExec.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import scala.language.implicitConversions
+
+import org.apache.spark.{SparkContext, SparkException}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.datasources.v2.DataSourceRDD
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.types.{LongType, StructField, StructType}
+
+trait ShimCometScanExec {
+  def wrapped: FileSourceScanExec
+
+  // TODO: remove after dropping Spark 3.2 support and directly call wrapped.metadataColumns
+  lazy val metadataColumns: Seq[AttributeReference] = wrapped.getClass.getDeclaredMethods
+    .filter(_.getName == "metadataColumns")
+    .map { a => a.setAccessible(true); a }
+    .flatMap(_.invoke(wrapped).asInstanceOf[Seq[AttributeReference]])
+
+  // TODO: remove after dropping Spark 3.2 and 3.3 support and directly call
+  //       wrapped.fileConstantMetadataColumns
+  lazy val fileConstantMetadataColumns: Seq[AttributeReference] =
+    wrapped.getClass.getDeclaredMethods
+      .filter(_.getName == "fileConstantMetadataColumns")
+      .map { a => a.setAccessible(true); a }
+      .flatMap(_.invoke(wrapped).asInstanceOf[Seq[AttributeReference]])
+
+  // TODO: remove after dropping Spark 3.2 support and directly call new DataSourceRDD
+  protected def newDataSourceRDD(
+      sc: SparkContext,
+      inputPartitions: Seq[Seq[InputPartition]],
+      partitionReaderFactory: PartitionReaderFactory,
+      columnarReads: Boolean,
+      customMetrics: Map[String, SQLMetric]): DataSourceRDD = {
+    implicit def flattenSeq(p: Seq[Seq[InputPartition]]): Seq[InputPartition] = p.flatten
+    new DataSourceRDD(sc, inputPartitions, partitionReaderFactory, columnarReads, customMetrics)
+  }
+
+  // TODO: remove after dropping Spark 3.2 support and directly call new FileScanRDD
+  protected def newFileScanRDD(
+      sparkSession: SparkSession,
+      readFunction: PartitionedFile => Iterator[InternalRow],
+      filePartitions: Seq[FilePartition],
+      readSchema: StructType,
+      options: ParquetOptions): FileScanRDD =
+    classOf[FileScanRDD].getDeclaredConstructors
+      .map { c =>
+        c.getParameterCount match {
+          case 3 => c.newInstance(sparkSession, readFunction, filePartitions)
+          case 5 =>
+            c.newInstance(sparkSession, readFunction, filePartitions, readSchema, metadataColumns)
+          case 6 =>
+            c.newInstance(
+              sparkSession,
+              readFunction,
+              filePartitions,
+              readSchema,
+              fileConstantMetadataColumns,
+              options)
+        }
+      }
+      .last
+      .asInstanceOf[FileScanRDD]
+
+  // TODO: remove after dropping Spark 3.2 and 3.3 support and directly call
+  //       QueryExecutionErrors.SparkException
+  protected def invalidBucketFile(path: String, sparkVersion: String): Throwable = {
+    if (sparkVersion >= "3.3") {
+      val messageParameters = if (sparkVersion >= "3.4") Map("path" -> path) else Array(path)
+      classOf[SparkException].getDeclaredConstructors
+        .filter(_.getParameterCount == 3)
+        .map(_.newInstance("INVALID_BUCKET_FILE", messageParameters, null))
+        .last
+        .asInstanceOf[SparkException]
+    } else { // Spark 3.2
+      new IllegalStateException(s"Invalid bucket file ${path}")
+    }
+  }
+
+  // Copied from Spark 3.4 RowIndexUtil due to PARQUET-2161 (tracked in SPARK-39634)
+  // TODO: remove after PARQUET-2161 becomes available in Parquet
+  private def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = {
+    sparkSchema.fields.zipWithIndex.find { case (field: StructField, _: Int) =>
+      field.name == ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+    } match {
+      case Some((field: StructField, idx: Int)) =>
+        if (field.dataType != LongType) {
+          throw new RuntimeException(
+            s"${ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} must be of LongType")
+        }
+        idx
+      case _ => -1
+    }
+  }
+
+  protected def isNeededForSchema(sparkSchema: StructType): Boolean = {
+    findRowIndexColumnIndexInSchema(sparkSchema) >= 0
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala
new file mode 100644
index 000000000..8afed84ff
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/shims/ShimCometSparkSessionExtensions.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
+
+trait ShimCometSparkSessionExtensions {
+
+  /**
+   * TODO: delete after dropping Spark 3.2.0 support and directly call scan.pushedAggregate
+   */
+  def getPushedAggregate(scan: ParquetScan): Option[Aggregation] = scan.getClass.getDeclaredFields
+    .filter(_.getName == "pushedAggregate")
+    .map { a => a.setAccessible(true); a }
+    .flatMap(_.get(scan).asInstanceOf[Option[Aggregation]])
+    .headOption
+}
diff --git a/spark/src/main/scala/org/apache/comet/shims/ShimQueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/shims/ShimQueryPlanSerde.scala
new file mode 100644
index 000000000..c47b399cf
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/shims/ShimQueryPlanSerde.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.catalyst.expressions.BinaryArithmetic
+import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
+
+trait ShimQueryPlanSerde {
+  def getFailOnError(b: BinaryArithmetic): Boolean =
+    b.getClass.getMethod("failOnError").invoke(b).asInstanceOf[Boolean]
+
+  def getFailOnError(aggregate: DeclarativeAggregate): Boolean = {
+    val failOnError = aggregate.getClass.getDeclaredMethods.flatMap(m =>
+      m.getName match {
+        case "failOnError" | "useAnsiAdd" => Some(m.invoke(aggregate).asInstanceOf[Boolean])
+        case _ => None
+      })
+    if (failOnError.isEmpty) {
+      aggregate.getClass.getDeclaredMethods
+        .flatMap(m =>
+          m.getName match {
+            case "initQueryContext" => Some(m.invoke(aggregate).asInstanceOf[Option[_]].isDefined)
+            case _ => None
+          })
+        .head
+    } else {
+      failOnError.head
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/comet/shims/ShimSQLConf.scala b/spark/src/main/scala/org/apache/comet/shims/ShimSQLConf.scala
new file mode 100644
index 000000000..ff60ef964
--- /dev/null
+++ b/spark/src/main/scala/org/apache/comet/shims/ShimSQLConf.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.internal.SQLConf
+
+trait ShimSQLConf {
+
+  /**
+   * Spark 3.4 renamed parquetFilterPushDownStringStartWith to
+   * parquetFilterPushDownStringPredicate
+   *
+   * TODO: delete after dropping Spark 3.2 & 3.3 support and simply use
+   * parquetFilterPushDownStringPredicate
+   */
+  protected def getPushDownStringPredicate(sqlConf: SQLConf): Boolean =
+    sqlConf.getClass.getMethods
+      .flatMap(m =>
+        m.getName match {
+          case "parquetFilterPushDownStringStartWith" | "parquetFilterPushDownStringPredicate" =>
+            Some(m.invoke(sqlConf).asInstanceOf[Boolean])
+          case _ => None
+        })
+      .head
+}
diff --git a/spark/src/main/scala/org/apache/spark/Plugins.scala b/spark/src/main/scala/org/apache/spark/Plugins.scala
new file mode 100644
index 000000000..113e3a47e
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/Plugins.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark
+
+import java.{util => ju}
+import java.util.Collections
+
+import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD}
+import org.apache.spark.resource.ResourceProfile
+
+import org.apache.comet.{CometConf, CometSparkSessionExtensions}
+
+/**
+ * Comet driver plugin. This class is loaded by Spark's plugin framework. It will be instantiated
+ * on driver side only. It will update the SparkConf with the extra configuration provided by
+ * Comet, e.g., Comet memory configurations.
+ *
+ * Note that `SparkContext.conf` is spark package only. So this plugin must be in spark package.
+ * Although `SparkContext.getConf` is public, it returns a copy of the SparkConf, so it cannot
+ * actually change Spark configs at runtime.
+ *
+ * To enable this plugin, set the config "spark.plugins" to `org.apache.spark.CometPlugin`.
+ */
+class CometDriverPlugin extends DriverPlugin with Logging {
+  import CometDriverPlugin._
+
+  override def init(sc: SparkContext, pluginContext: PluginContext): ju.Map[String, String] = {
+    logInfo("CometDriverPlugin init")
+
+    if (shouldOverrideMemoryConf(sc.getConf)) {
+      val execMemOverhead = if (sc.getConf.contains(EXECUTOR_MEMORY_OVERHEAD.key)) {
+        sc.getConf.getSizeAsMb(EXECUTOR_MEMORY_OVERHEAD.key)
+      } else {
+        // By default, executorMemory * spark.executor.memoryOverheadFactor, with minimum of 384MB
+        val executorMemory = sc.getConf.getSizeAsMb(EXECUTOR_MEMORY.key)
+        val memoryOverheadFactor =
+          sc.getConf.getDouble(
+            EXECUTOR_MEMORY_OVERHEAD_FACTOR,
+            EXECUTOR_MEMORY_OVERHEAD_FACTOR_DEFAULT)
+
+        Math.max(
+          (executorMemory * memoryOverheadFactor).toInt,
+          ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)
+      }
+
+      val cometMemOverhead = CometSparkSessionExtensions.getCometMemoryOverheadInMiB(sc.getConf)
+      sc.conf.set(EXECUTOR_MEMORY_OVERHEAD.key, s"${execMemOverhead + cometMemOverhead}M")
+      val newExecMemOverhead = sc.getConf.getSizeAsMb(EXECUTOR_MEMORY_OVERHEAD.key)
+
+      logInfo(s"""
+         Overriding Spark memory configuration for Comet:
+           - Spark executor memory overhead: ${execMemOverhead}MB
+           - Comet memory overhead: ${cometMemOverhead}MB
+           - Updated Spark executor memory overhead: ${newExecMemOverhead}MB
+         """)
+    }
+
+    Collections.emptyMap[String, String]
+  }
+
+  override def receive(message: Any): AnyRef = super.receive(message)
+
+  override def shutdown(): Unit = {
+    logInfo("CometDriverPlugin shutdown")
+
+    super.shutdown()
+  }
+
+  override def registerMetrics(appId: String, pluginContext: PluginContext): Unit =
+    super.registerMetrics(appId, pluginContext)
+
+  /**
+   * Whether we should override Spark memory configuration for Comet. This only returns true when
+   * Comet native execution is enabled
+   */
+  private def shouldOverrideMemoryConf(conf: SparkConf): Boolean = {
+    conf.getBoolean(CometConf.COMET_ENABLED.key, true) &&
+    conf.getBoolean(CometConf.COMET_EXEC_ENABLED.key, false)
+  }
+}
+
+object CometDriverPlugin {
+  // `org.apache.spark.internal.config.EXECUTOR_MEMORY_OVERHEAD_FACTOR` was added since Spark 3.3.0
+  val EXECUTOR_MEMORY_OVERHEAD_FACTOR = "spark.executor.memoryOverheadFactor"
+  val EXECUTOR_MEMORY_OVERHEAD_FACTOR_DEFAULT = 0.1
+}
+
+/**
+ * The Comet plugin for Spark. To enable this plugin, set the config "spark.plugins" to
+ * `org.apache.spark.CometPlugin`
+ */
+class CometPlugin extends SparkPlugin with Logging {
+  override def driverPlugin(): DriverPlugin = new CometDriverPlugin
+
+  override def executorPlugin(): ExecutorPlugin = null
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometBatchScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometBatchScanExec.scala
new file mode 100644
index 000000000..866231dea
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometBatchScanExec.scala
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import scala.concurrent.duration.NANOSECONDS
+
+import org.apache.spark.rdd._
+import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DynamicPruningExpression, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.execution.metric._
+import org.apache.spark.sql.vectorized._
+
+import org.apache.comet.MetricsSupport
+import org.apache.comet.shims.ShimCometBatchScanExec
+
+import com.google.common.base.Objects
+
+case class CometBatchScanExec(wrapped: BatchScanExec, runtimeFilters: Seq[Expression])
+    extends DataSourceV2ScanExecBase
+    with ShimCometBatchScanExec
+    with CometPlan {
+
+  wrapped.logicalLink.foreach(setLogicalLink)
+
+  override lazy val inputRDD: RDD[InternalRow] = wrappedScan.inputRDD
+
+  override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    val numOutputRows = longMetric("numOutputRows")
+    val scanTime = longMetric("scanTime")
+    inputRDD.asInstanceOf[RDD[ColumnarBatch]].mapPartitionsInternal { batches =>
+      new Iterator[ColumnarBatch] {
+
+        override def hasNext: Boolean = {
+          // The `FileScanRDD` returns an iterator which scans the file during the `hasNext` call.
+          val startNs = System.nanoTime()
+          val res = batches.hasNext
+          scanTime += NANOSECONDS.toMillis(System.nanoTime() - startNs)
+          res
+        }
+
+        override def next(): ColumnarBatch = {
+          val batch = batches.next()
+          numOutputRows += batch.numRows()
+          batch
+        }
+      }
+    }
+  }
+
+  // `ReusedSubqueryExec` in Spark only call non-columnar execute.
+  override def doExecute(): RDD[InternalRow] = {
+    ColumnarToRowExec(this).doExecute()
+  }
+
+  override def executeCollect(): Array[InternalRow] = {
+    ColumnarToRowExec(this).executeCollect()
+  }
+
+  override def readerFactory: PartitionReaderFactory = wrappedScan.readerFactory
+
+  override def scan: Scan = wrapped.scan
+
+  override def output: Seq[Attribute] = wrapped.output
+
+  override def equals(other: Any): Boolean = other match {
+    case other: CometBatchScanExec =>
+      // `wrapped` in `this` and `other` could reference to the same `BatchScanExec` object,
+      // therefore we need to also check `runtimeFilters` equality here.
+      this.wrappedScan == other.wrappedScan && this.runtimeFilters == other.runtimeFilters
+    case _ =>
+      false
+  }
+
+  override def hashCode(): Int = {
+    Objects.hashCode(wrappedScan, runtimeFilters)
+  }
+
+  override def doCanonicalize(): CometBatchScanExec = {
+    this.copy(
+      wrapped = wrappedScan.doCanonicalize(),
+      runtimeFilters = QueryPlan.normalizePredicates(
+        runtimeFilters.filterNot(_ == DynamicPruningExpression(Literal.TrueLiteral)),
+        output))
+  }
+
+  override def nodeName: String = {
+    wrapped.nodeName.replace("BatchScan", "CometBatchScan")
+  }
+
+  override def simpleString(maxFields: Int): String = {
+    val truncatedOutputString = truncatedString(output, "[", ", ", "]", maxFields)
+    val runtimeFiltersString =
+      s"RuntimeFilters: ${runtimeFilters.mkString("[", ",", "]")}"
+    val result = s"$nodeName$truncatedOutputString ${scan.description()} $runtimeFiltersString"
+    redact(result)
+  }
+
+  private def wrappedScan: BatchScanExec = {
+    // The runtime filters in this scan could be transformed by optimizer rules such as
+    // `PlanAdaptiveDynamicPruningFilters`, while the one in the wrapped scan is not. And
+    // since `inputRDD` uses the latter and therefore will be incorrect if we don't set it here.
+    //
+    // There is, however, no good way to modify `wrapped.runtimeFilters` since it is immutable.
+    // It is not good to use `wrapped.copy` here since it will also re-initialize those lazy val
+    // in the `BatchScanExec`, e.g., metrics.
+    //
+    // TODO: find a better approach than this hack
+    val f = classOf[BatchScanExec].getDeclaredField("runtimeFilters")
+    f.setAccessible(true)
+    f.set(wrapped, runtimeFilters)
+    wrapped
+  }
+
+  override lazy val metrics: Map[String, SQLMetric] = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+    "scanTime" -> SQLMetrics.createTimingMetric(
+      sparkContext,
+      "scan time")) ++ wrapped.customMetrics ++ {
+    wrapped.scan match {
+      case s: MetricsSupport => s.initMetrics(sparkContext)
+      case _ => Map.empty
+    }
+  }
+
+  // Intentionally omitting the return type as it is different depending on Spark version
+  // Spark 3.2.x Seq[InputPartition]
+  // Spark 3.3.x Seq[Seq[InputPartition]]
+  // TODO: add back the return type after dropping Spark 3.2.0 support
+  @transient override lazy val partitions = wrappedScan.partitions
+
+  override def supportsColumnar: Boolean = true
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometCoalesceExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometCoalesceExec.scala
new file mode 100644
index 000000000..fc4f90f89
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometCoalesceExec.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, SinglePartition, UnknownPartitioning}
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+import com.google.common.base.Objects
+
+/**
+ * This is basically a copy of Spark's CoalesceExec, but supports columnar processing to make it
+ * more efficient when including it in a Comet query plan.
+ */
+case class CometCoalesceExec(
+    override val originalPlan: SparkPlan,
+    numPartitions: Int,
+    child: SparkPlan)
+    extends CometExec
+    with UnaryExecNode {
+  protected override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    val rdd = child.executeColumnar()
+    if (numPartitions == 1 && rdd.getNumPartitions < 1) {
+      // Make sure we don't output an RDD with 0 partitions, when claiming that we have a
+      // `SinglePartition`.
+      new CometCoalesceExec.EmptyRDDWithPartitions(sparkContext, numPartitions)
+    } else {
+      rdd.coalesce(numPartitions, shuffle = false)
+    }
+  }
+
+  override def outputPartitioning: Partitioning = {
+    if (numPartitions == 1) SinglePartition
+    else UnknownPartitioning(numPartitions)
+  }
+
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometCoalesceExec =>
+        this.numPartitions == other.numPartitions && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(numPartitions: java.lang.Integer, child)
+}
+
+object CometCoalesceExec {
+
+  /** A simple RDD with no data, but with the given number of partitions. */
+  class EmptyRDDWithPartitions(@transient private val sc: SparkContext, numPartitions: Int)
+      extends RDD[ColumnarBatch](sc, Nil) {
+
+    override def getPartitions: Array[Partition] =
+      Array.tabulate(numPartitions)(i => EmptyPartition(i))
+
+    override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = {
+      Iterator.empty
+    }
+  }
+
+  case class EmptyPartition(index: Int) extends Partition
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala
new file mode 100644
index 000000000..60b26ca00
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+
+/**
+ * A node carrying SQL metrics from SparkPlan, and metrics of its children. Native code will call
+ * [[getChildNode]] and [[add]] to update the metrics.
+ *
+ * @param metrics
+ *   the mapping between metric name of native operator to `SQLMetric` of Spark operator. For
+ *   example, `numOutputRows` -> `SQLMetrics("numOutputRows")` means the native operator will
+ *   update `numOutputRows` metric with the value of `SQLMetrics("numOutputRows")` in Spark
+ *   operator.
+ */
+case class CometMetricNode(metrics: Map[String, SQLMetric], children: Seq[CometMetricNode])
+    extends Logging {
+
+  /**
+   * Gets a child node. Called from native.
+   */
+  def getChildNode(i: Int): CometMetricNode = {
+    if (i < 0 || i >= children.length) {
+      // TODO: throw an exception, e.g. IllegalArgumentException, instead?
+      return null
+    }
+    children(i)
+  }
+
+  /**
+   * Adds a value to a metric. Called from native.
+   *
+   * @param metricName
+   *   the name of the metric at native operator.
+   * @param v
+   *   the value to add.
+   */
+  def add(metricName: String, v: Long): Unit = {
+    metrics.get(metricName) match {
+      case Some(metric) => metric.set(v)
+      case None =>
+        // no-op
+        logDebug(s"Non-existing metric: $metricName. Ignored")
+    }
+  }
+}
+
+object CometMetricNode {
+
+  /**
+   * The baseline SQL metrics for DataFusion `BaselineMetrics`.
+   */
+  def baselineMetrics(sc: SparkContext): Map[String, SQLMetric] = {
+    Map(
+      "output_rows" -> SQLMetrics.createMetric(sc, "number of output rows"),
+      "elapsed_compute" -> SQLMetrics.createNanoTimingMetric(
+        sc,
+        "total time (in ms) spent in this operator"))
+  }
+
+  /**
+   * Creates a [[CometMetricNode]] from a [[CometPlan]].
+   */
+  def fromCometPlan(cometPlan: SparkPlan): CometMetricNode = {
+    val children = cometPlan.children.map(fromCometPlan)
+    CometMetricNode(cometPlan.metrics, children)
+  }
+
+  /**
+   * Creates a [[CometMetricNode]] from a map of [[SQLMetric]].
+   */
+  def apply(metrics: Map[String, SQLMetric]): CometMetricNode = {
+    CometMetricNode(metrics, Nil)
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometPlan.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometPlan.scala
new file mode 100644
index 000000000..fe2ce7ed0
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometPlan.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * The base trait for physical Comet operators.
+ */
+trait CometPlan extends SparkPlan
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
new file mode 100644
index 000000000..4bf01f0f4
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import scala.collection.mutable.HashMap
+import scala.concurrent.duration.NANOSECONDS
+
+import org.apache.hadoop.fs.Path
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.metric._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection._
+
+import org.apache.comet.{CometConf, MetricsSupport}
+import org.apache.comet.parquet.{CometParquetFileFormat, CometParquetPartitionReaderFactory}
+import org.apache.comet.shims.{ShimCometScanExec, ShimFileFormat}
+
+/**
+ * Comet physical scan node for DataSource V1. Most of the code here follow Spark's
+ * [[FileSourceScanExec]],
+ */
+case class CometScanExec(
+    @transient relation: HadoopFsRelation,
+    output: Seq[Attribute],
+    requiredSchema: StructType,
+    partitionFilters: Seq[Expression],
+    optionalBucketSet: Option[BitSet],
+    optionalNumCoalescedBuckets: Option[Int],
+    dataFilters: Seq[Expression],
+    tableIdentifier: Option[TableIdentifier],
+    disableBucketedScan: Boolean = false,
+    wrapped: FileSourceScanExec)
+    extends DataSourceScanExec
+    with ShimCometScanExec
+    with CometPlan {
+
+  // FIXME: ideally we should reuse wrapped.supportsColumnar, however that fails many tests
+  override lazy val supportsColumnar: Boolean =
+    relation.fileFormat.supportBatch(relation.sparkSession, schema)
+
+  override def vectorTypes: Option[Seq[String]] = wrapped.vectorTypes
+
+  private lazy val driverMetrics: HashMap[String, Long] = HashMap.empty
+
+  /**
+   * Send the driver-side metrics. Before calling this function, selectedPartitions has been
+   * initialized. See SPARK-26327 for more details.
+   */
+  private def sendDriverMetrics(): Unit = {
+    driverMetrics.foreach(e => metrics(e._1).add(e._2))
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(
+      sparkContext,
+      executionId,
+      metrics.filter(e => driverMetrics.contains(e._1)).values.toSeq)
+  }
+
+  private def isDynamicPruningFilter(e: Expression): Boolean =
+    e.find(_.isInstanceOf[PlanExpression[_]]).isDefined
+
+  @transient lazy val selectedPartitions: Array[PartitionDirectory] = {
+    val optimizerMetadataTimeNs = relation.location.metadataOpsTimeNs.getOrElse(0L)
+    val startTime = System.nanoTime()
+    val ret =
+      relation.location.listFiles(partitionFilters.filterNot(isDynamicPruningFilter), dataFilters)
+    setFilesNumAndSizeMetric(ret, true)
+    val timeTakenMs =
+      NANOSECONDS.toMillis((System.nanoTime() - startTime) + optimizerMetadataTimeNs)
+    driverMetrics("metadataTime") = timeTakenMs
+    ret
+  }.toArray
+
+  // We can only determine the actual partitions at runtime when a dynamic partition filter is
+  // present. This is because such a filter relies on information that is only available at run
+  // time (for instance the keys used in the other side of a join).
+  @transient private lazy val dynamicallySelectedPartitions: Array[PartitionDirectory] = {
+    val dynamicPartitionFilters = partitionFilters.filter(isDynamicPruningFilter)
+
+    if (dynamicPartitionFilters.nonEmpty) {
+      val startTime = System.nanoTime()
+      // call the file index for the files matching all filters except dynamic partition filters
+      val predicate = dynamicPartitionFilters.reduce(And)
+      val partitionColumns = relation.partitionSchema
+      val boundPredicate = Predicate.create(
+        predicate.transform { case a: AttributeReference =>
+          val index = partitionColumns.indexWhere(a.name == _.name)
+          BoundReference(index, partitionColumns(index).dataType, nullable = true)
+        },
+        Nil)
+      val ret = selectedPartitions.filter(p => boundPredicate.eval(p.values))
+      setFilesNumAndSizeMetric(ret, false)
+      val timeTakenMs = (System.nanoTime() - startTime) / 1000 / 1000
+      driverMetrics("pruningTime") = timeTakenMs
+      ret
+    } else {
+      selectedPartitions
+    }
+  }
+
+  // exposed for testing
+  lazy val bucketedScan: Boolean = wrapped.bucketedScan
+
+  override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) =
+    (wrapped.outputPartitioning, wrapped.outputOrdering)
+
+  @transient
+  private lazy val pushedDownFilters = {
+    val supportNestedPredicatePushdown = DataSourceUtils.supportNestedPredicatePushdown(relation)
+    dataFilters.flatMap(DataSourceStrategy.translateFilter(_, supportNestedPredicatePushdown))
+  }
+
+  override lazy val metadata: Map[String, String] =
+    if (wrapped == null) Map.empty else wrapped.metadata
+
+  override def verboseStringWithOperatorId(): String = wrapped.verboseStringWithOperatorId()
+
+  lazy val inputRDD: RDD[InternalRow] = {
+    val options = relation.options +
+      (ShimFileFormat.OPTION_RETURNING_BATCH -> supportsColumnar.toString)
+    val readFile: (PartitionedFile) => Iterator[InternalRow] =
+      relation.fileFormat.buildReaderWithPartitionValues(
+        sparkSession = relation.sparkSession,
+        dataSchema = relation.dataSchema,
+        partitionSchema = relation.partitionSchema,
+        requiredSchema = requiredSchema,
+        filters = pushedDownFilters,
+        options = options,
+        hadoopConf =
+          relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options))
+
+    val readRDD = if (bucketedScan) {
+      createBucketedReadRDD(
+        relation.bucketSpec.get,
+        readFile,
+        dynamicallySelectedPartitions,
+        relation)
+    } else {
+      createReadRDD(readFile, dynamicallySelectedPartitions, relation)
+    }
+    sendDriverMetrics()
+    readRDD
+  }
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = {
+    inputRDD :: Nil
+  }
+
+  /** Helper for computing total number and size of files in selected partitions. */
+  private def setFilesNumAndSizeMetric(
+      partitions: Seq[PartitionDirectory],
+      static: Boolean): Unit = {
+    val filesNum = partitions.map(_.files.size.toLong).sum
+    val filesSize = partitions.map(_.files.map(_.getLen).sum).sum
+    if (!static || !partitionFilters.exists(isDynamicPruningFilter)) {
+      driverMetrics("numFiles") = filesNum
+      driverMetrics("filesSize") = filesSize
+    } else {
+      driverMetrics("staticFilesNum") = filesNum
+      driverMetrics("staticFilesSize") = filesSize
+    }
+    if (relation.partitionSchema.nonEmpty) {
+      driverMetrics("numPartitions") = partitions.length
+    }
+  }
+
+  override lazy val metrics: Map[String, SQLMetric] = wrapped.metrics ++ {
+    // Tracking scan time has overhead, we can't afford to do it for each row, and can only do
+    // it for each batch.
+    if (supportsColumnar) {
+      Some("scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time"))
+    } else {
+      None
+    }
+  } ++ {
+    relation.fileFormat match {
+      case f: MetricsSupport => f.initMetrics(sparkContext)
+      case _ => Map.empty
+    }
+  }
+
+  protected override def doExecute(): RDD[InternalRow] = {
+    ColumnarToRowExec(this).doExecute()
+  }
+
+  protected override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    val numOutputRows = longMetric("numOutputRows")
+    val scanTime = longMetric("scanTime")
+    inputRDD.asInstanceOf[RDD[ColumnarBatch]].mapPartitionsInternal { batches =>
+      new Iterator[ColumnarBatch] {
+
+        override def hasNext: Boolean = {
+          // The `FileScanRDD` returns an iterator which scans the file during the `hasNext` call.
+          val startNs = System.nanoTime()
+          val res = batches.hasNext
+          scanTime += NANOSECONDS.toMillis(System.nanoTime() - startNs)
+          res
+        }
+
+        override def next(): ColumnarBatch = {
+          val batch = batches.next()
+          numOutputRows += batch.numRows()
+          batch
+        }
+      }
+    }
+  }
+
+  override def executeCollect(): Array[InternalRow] = {
+    ColumnarToRowExec(this).executeCollect()
+  }
+
+  override val nodeName: String =
+    s"CometScan $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
+
+  /**
+   * Create an RDD for bucketed reads. The non-bucketed variant of this function is
+   * [[createReadRDD]].
+   *
+   * The algorithm is pretty simple: each RDD partition being returned should include all the
+   * files with the same bucket id from all the given Hive partitions.
+   *
+   * @param bucketSpec
+   *   the bucketing spec.
+   * @param readFile
+   *   a function to read each (part of a) file.
+   * @param selectedPartitions
+   *   Hive-style partition that are part of the read.
+   * @param fsRelation
+   *   [[HadoopFsRelation]] associated with the read.
+   */
+  private def createBucketedReadRDD(
+      bucketSpec: BucketSpec,
+      readFile: (PartitionedFile) => Iterator[InternalRow],
+      selectedPartitions: Array[PartitionDirectory],
+      fsRelation: HadoopFsRelation): RDD[InternalRow] = {
+    logInfo(s"Planning with ${bucketSpec.numBuckets} buckets")
+    val filesGroupedToBuckets =
+      selectedPartitions
+        .flatMap { p =>
+          p.files.map { f =>
+            PartitionedFileUtil.getPartitionedFile(f, f.getPath, p.values)
+          }
+        }
+        .groupBy { f =>
+          BucketingUtils
+            .getBucketId(new Path(f.filePath.toString()).getName)
+            .getOrElse(throw invalidBucketFile(f.filePath.toString(), sparkContext.version))
+        }
+
+    val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) {
+      val bucketSet = optionalBucketSet.get
+      filesGroupedToBuckets.filter { f =>
+        bucketSet.get(f._1)
+      }
+    } else {
+      filesGroupedToBuckets
+    }
+
+    val filePartitions = optionalNumCoalescedBuckets
+      .map { numCoalescedBuckets =>
+        logInfo(s"Coalescing to ${numCoalescedBuckets} buckets")
+        val coalescedBuckets = prunedFilesGroupedToBuckets.groupBy(_._1 % numCoalescedBuckets)
+        Seq.tabulate(numCoalescedBuckets) { bucketId =>
+          val partitionedFiles = coalescedBuckets
+            .get(bucketId)
+            .map {
+              _.values.flatten.toArray
+            }
+            .getOrElse(Array.empty)
+          FilePartition(bucketId, partitionedFiles)
+        }
+      }
+      .getOrElse {
+        Seq.tabulate(bucketSpec.numBuckets) { bucketId =>
+          FilePartition(bucketId, prunedFilesGroupedToBuckets.getOrElse(bucketId, Array.empty))
+        }
+      }
+
+    prepareRDD(fsRelation, readFile, filePartitions)
+  }
+
+  /**
+   * Create an RDD for non-bucketed reads. The bucketed variant of this function is
+   * [[createBucketedReadRDD]].
+   *
+   * @param readFile
+   *   a function to read each (part of a) file.
+   * @param selectedPartitions
+   *   Hive-style partition that are part of the read.
+   * @param fsRelation
+   *   [[HadoopFsRelation]] associated with the read.
+   */
+  private def createReadRDD(
+      readFile: (PartitionedFile) => Iterator[InternalRow],
+      selectedPartitions: Array[PartitionDirectory],
+      fsRelation: HadoopFsRelation): RDD[InternalRow] = {
+    val openCostInBytes = fsRelation.sparkSession.sessionState.conf.filesOpenCostInBytes
+    val maxSplitBytes =
+      FilePartition.maxSplitBytes(fsRelation.sparkSession, selectedPartitions)
+    logInfo(
+      s"Planning scan with bin packing, max size: $maxSplitBytes bytes, " +
+        s"open cost is considered as scanning $openCostInBytes bytes.")
+
+    // Filter files with bucket pruning if possible
+    val bucketingEnabled = fsRelation.sparkSession.sessionState.conf.bucketingEnabled
+    val shouldProcess: Path => Boolean = optionalBucketSet match {
+      case Some(bucketSet) if bucketingEnabled =>
+        // Do not prune the file if bucket file name is invalid
+        filePath => BucketingUtils.getBucketId(filePath.getName).forall(bucketSet.get)
+      case _ =>
+        _ => true
+    }
+
+    val splitFiles = selectedPartitions
+      .flatMap { partition =>
+        partition.files.flatMap { file =>
+          // getPath() is very expensive so we only want to call it once in this block:
+          val filePath = file.getPath
+
+          if (shouldProcess(filePath)) {
+            val isSplitable = relation.fileFormat.isSplitable(
+              relation.sparkSession,
+              relation.options,
+              filePath) &&
+              // SPARK-39634: Allow file splitting in combination with row index generation once
+              // the fix for PARQUET-2161 is available.
+              !isNeededForSchema(requiredSchema)
+            PartitionedFileUtil.splitFiles(
+              sparkSession = relation.sparkSession,
+              file = file,
+              filePath = filePath,
+              isSplitable = isSplitable,
+              maxSplitBytes = maxSplitBytes,
+              partitionValues = partition.values)
+          } else {
+            Seq.empty
+          }
+        }
+      }
+      .sortBy(_.length)(implicitly[Ordering[Long]].reverse)
+
+    prepareRDD(
+      fsRelation,
+      readFile,
+      FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes))
+  }
+
+  private def prepareRDD(
+      fsRelation: HadoopFsRelation,
+      readFile: (PartitionedFile) => Iterator[InternalRow],
+      partitions: Seq[FilePartition]): RDD[InternalRow] = {
+    val hadoopConf = relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options)
+    val prefetchEnabled = hadoopConf.getBoolean(
+      CometConf.COMET_SCAN_PREFETCH_ENABLED.key,
+      CometConf.COMET_SCAN_PREFETCH_ENABLED.defaultValue.get)
+
+    val sqlConf = fsRelation.sparkSession.sessionState.conf
+    if (prefetchEnabled) {
+      CometParquetFileFormat.populateConf(sqlConf, hadoopConf)
+      val broadcastedConf =
+        fsRelation.sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+      val partitionReaderFactory = CometParquetPartitionReaderFactory(
+        sqlConf,
+        broadcastedConf,
+        requiredSchema,
+        relation.partitionSchema,
+        pushedDownFilters.toArray,
+        new ParquetOptions(CaseInsensitiveMap(relation.options), sqlConf),
+        metrics)
+
+      newDataSourceRDD(
+        fsRelation.sparkSession.sparkContext,
+        partitions.map(Seq(_)),
+        partitionReaderFactory,
+        true,
+        Map.empty)
+    } else {
+      newFileScanRDD(
+        fsRelation.sparkSession,
+        readFile,
+        partitions,
+        new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields),
+        new ParquetOptions(CaseInsensitiveMap(relation.options), sqlConf))
+    }
+  }
+
+  // Filters unused DynamicPruningExpression expressions - one which has been replaced
+  // with DynamicPruningExpression(Literal.TrueLiteral) during Physical Planning
+  private def filterUnusedDynamicPruningExpressions(
+      predicates: Seq[Expression]): Seq[Expression] = {
+    predicates.filterNot(_ == DynamicPruningExpression(Literal.TrueLiteral))
+  }
+
+  override def doCanonicalize(): CometScanExec = {
+    CometScanExec(
+      relation,
+      output.map(QueryPlan.normalizeExpressions(_, output)),
+      requiredSchema,
+      QueryPlan.normalizePredicates(
+        filterUnusedDynamicPruningExpressions(partitionFilters),
+        output),
+      optionalBucketSet,
+      optionalNumCoalescedBuckets,
+      QueryPlan.normalizePredicates(dataFilters, output),
+      None,
+      disableBucketedScan,
+      null)
+  }
+}
+
+object CometScanExec {
+  def apply(scanExec: FileSourceScanExec, session: SparkSession): CometScanExec = {
+    val wrapped = scanExec.copy(relation =
+      scanExec.relation.copy(fileFormat = new CometParquetFileFormat)(session))
+    val batchScanExec = CometScanExec(
+      wrapped.relation,
+      wrapped.output,
+      wrapped.requiredSchema,
+      wrapped.partitionFilters,
+      wrapped.optionalBucketSet,
+      wrapped.optionalNumCoalescedBuckets,
+      wrapped.dataFilters,
+      wrapped.tableIdentifier,
+      wrapped.disableBucketedScan,
+      wrapped)
+    scanExec.logicalLink.foreach(batchScanExec.setLogicalLink)
+    batchScanExec
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/DataSourceHelpers.scala b/spark/src/main/scala/org/apache/spark/sql/comet/DataSourceHelpers.scala
new file mode 100644
index 000000000..5c505c403
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/DataSourceHelpers.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.sources._
+
+object DataSourceHelpers {
+  def getPushedDownFilters(relation: BaseRelation, dataFilters: Seq[Expression]): Seq[Filter] = {
+    val supportNestedPredicatePushdown = DataSourceUtils.supportNestedPredicatePushdown(relation)
+    dataFilters.flatMap(DataSourceStrategy.translateFilter(_, supportNestedPredicatePushdown))
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/DecimalPrecision.scala b/spark/src/main/scala/org/apache/spark/sql/comet/DecimalPrecision.scala
new file mode 100644
index 000000000..13f26ce58
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/DecimalPrecision.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import scala.math.{max, min}
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.DecimalType
+
+/**
+ * This is mostly copied from the `decimalAndDecimal` method in Spark's [[DecimalPrecision]] which
+ * existed before Spark 3.4.
+ *
+ * In Spark 3.4 and up, the method `decimalAndDecimal` is removed from Spark, and for binary
+ * expressions with different decimal precisions from children, the difference is handled in the
+ * expression evaluation instead (see SPARK-39316).
+ *
+ * However in Comet, we still have to rely on the type coercion to ensure the decimal precision is
+ * the same for both children of a binary expression, since our arithmetic kernels do not yet
+ * handle the case where precision is different. Therefore, this re-apply the logic in the
+ * original rule, and rely on `Cast` and `CheckOverflow` for decimal binary operation.
+ *
+ * TODO: instead of relying on this rule, it's probably better to enhance arithmetic kernels to
+ * handle different decimal precisions
+ */
+object DecimalPrecision {
+  def promote(
+      allowPrecisionLoss: Boolean,
+      expr: Expression,
+      nullOnOverflow: Boolean): Expression = {
+    expr.transformUp {
+      // This means the binary expression is already optimized with the rule in Spark. This can
+      // happen if the Spark version is < 3.4
+      case e: BinaryArithmetic if e.left.prettyName == "promote_precision" => e
+
+      case add @ Add(DecimalExpression(p1, s1), DecimalExpression(p2, s2), _) =>
+        val resultScale = max(s1, s2)
+        val resultType = if (allowPrecisionLoss) {
+          DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
+        } else {
+          DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
+        }
+        CheckOverflow(add, resultType, nullOnOverflow)
+
+      case sub @ Subtract(DecimalType.Expression(p1, s1), DecimalType.Expression(p2, s2), _) =>
+        val resultScale = max(s1, s2)
+        val resultType = if (allowPrecisionLoss) {
+          DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
+        } else {
+          DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
+        }
+        CheckOverflow(sub, resultType, nullOnOverflow)
+
+      case mul @ Multiply(DecimalType.Expression(p1, s1), DecimalType.Expression(p2, s2), _) =>
+        val resultType = if (allowPrecisionLoss) {
+          DecimalType.adjustPrecisionScale(p1 + p2 + 1, s1 + s2)
+        } else {
+          DecimalType.bounded(p1 + p2 + 1, s1 + s2)
+        }
+        CheckOverflow(mul, resultType, nullOnOverflow)
+
+      case div @ Divide(DecimalType.Expression(p1, s1), DecimalType.Expression(p2, s2), _) =>
+        val resultType = if (allowPrecisionLoss) {
+          // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
+          // Scale: max(6, s1 + p2 + 1)
+          val intDig = p1 - s1 + s2
+          val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1)
+          val prec = intDig + scale
+          DecimalType.adjustPrecisionScale(prec, scale)
+        } else {
+          var intDig = min(DecimalType.MAX_SCALE, p1 - s1 + s2)
+          var decDig = min(DecimalType.MAX_SCALE, max(6, s1 + p2 + 1))
+          val diff = (intDig + decDig) - DecimalType.MAX_SCALE
+          if (diff > 0) {
+            decDig -= diff / 2 + 1
+            intDig = DecimalType.MAX_SCALE - decDig
+          }
+          DecimalType.bounded(intDig + decDig, decDig)
+        }
+        CheckOverflow(div, resultType, nullOnOverflow)
+
+      case rem @ Remainder(DecimalType.Expression(p1, s1), DecimalType.Expression(p2, s2), _) =>
+        val resultType = if (allowPrecisionLoss) {
+          DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+        } else {
+          DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+        }
+        CheckOverflow(rem, resultType, nullOnOverflow)
+
+      case e => e
+    }
+  }
+
+  object DecimalExpression {
+    def unapply(e: Expression): Option[(Int, Int)] = e.dataType match {
+      case t: DecimalType => Some((t.precision, t.scale))
+      case _ => None
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
new file mode 100644
index 000000000..46fc27415
--- /dev/null
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
@@ -0,0 +1,473 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import java.io.ByteArrayOutputStream
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateMode}
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, ExplainUtils, LeafExecNode, ScalarSubquery, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+import org.apache.comet.{CometConf, CometExecIterator, CometRuntimeException, CometSparkSessionExtensions}
+import org.apache.comet.CometConf.{COMET_BATCH_SIZE, COMET_DEBUG_ENABLED, COMET_EXEC_MEMORY_FRACTION}
+import org.apache.comet.serde.OperatorOuterClass.Operator
+
+import com.google.common.base.Objects
+
+/**
+ * A Comet physical operator
+ */
+abstract class CometExec extends CometPlan {
+
+  /** The original Spark operator from which this Comet operator is converted from */
+  def originalPlan: SparkPlan
+
+  /** Comet always support columnar execution */
+  override def supportsColumnar: Boolean = true
+
+  override def output: Seq[Attribute] = originalPlan.output
+
+  override def doExecute(): RDD[InternalRow] =
+    ColumnarToRowExec(this).doExecute()
+
+  override def executeCollect(): Array[InternalRow] =
+    ColumnarToRowExec(this).executeCollect()
+
+  override def outputOrdering: Seq[SortOrder] = originalPlan.outputOrdering
+
+  override def outputPartitioning: Partitioning = originalPlan.outputPartitioning
+}
+
+object CometExec {
+  // An unique id for each CometExecIterator, used to identify the native query execution.
+  private val curId = new java.util.concurrent.atomic.AtomicLong()
+
+  def newIterId: Long = curId.getAndIncrement()
+
+  def getCometIterator(
+      inputs: Seq[Iterator[ColumnarBatch]],
+      nativePlan: Operator): CometExecIterator = {
+    getCometIterator(inputs, nativePlan, CometMetricNode(Map.empty))
+  }
+
+  def getCometIterator(
+      inputs: Seq[Iterator[ColumnarBatch]],
+      nativePlan: Operator,
+      nativeMetrics: CometMetricNode): CometExecIterator = {
+    val outputStream = new ByteArrayOutputStream()
+    nativePlan.writeTo(outputStream)
+    outputStream.close()
+    val bytes = outputStream.toByteArray
+
+    val configs = new java.util.HashMap[String, String]()
+
+    val maxMemory =
+      CometSparkSessionExtensions.getCometMemoryOverhead(SparkEnv.get.conf)
+    configs.put("memory_limit", String.valueOf(maxMemory))
+    configs.put("memory_fraction", String.valueOf(COMET_EXEC_MEMORY_FRACTION.get()))
+    configs.put("batch_size", String.valueOf(COMET_BATCH_SIZE.get()))
+    configs.put("debug_native", String.valueOf(COMET_DEBUG_ENABLED.get()))
+
+    new CometExecIterator(newIterId, inputs, bytes, configs, nativeMetrics)
+  }
+}
+
+/**
+ * A Comet native physical operator.
+ */
+abstract class CometNativeExec extends CometExec {
+
+  /**
+   * The serialized native query plan, optional. This is only defined when the current node is the
+   * "boundary" node between native and Spark.
+   */
+  private var serializedPlanOpt: Option[Array[Byte]] = None
+
+  /** The Comet native operator */
+  def nativeOp: Operator
+
+  override protected def doPrepare(): Unit = prepareSubqueries(originalPlan)
+
+  override lazy val metrics: Map[String, SQLMetric] =
+    CometMetricNode.baselineMetrics(sparkContext)
+
+  private def prepareSubqueries(sparkPlan: SparkPlan): Unit = {
+    val runningSubqueries = new ArrayBuffer[ExecSubqueryExpression]
+
+    sparkPlan.children.foreach(prepareSubqueries)
+
+    sparkPlan.expressions.foreach {
+      _.collect { case e: ScalarSubquery =>
+        runningSubqueries += e
+      }
+    }
+
+    // fill in the result of subqueries
+    runningSubqueries.foreach { sub =>
+      sub.updateResult()
+    }
+
+    runningSubqueries.clear()
+  }
+
+  private def setSubqueries(planId: Long, sparkPlan: SparkPlan): Unit = {
+    sparkPlan.children.foreach(setSubqueries(planId, _))
+
+    sparkPlan.expressions.foreach {
+      _.collect { case sub: ScalarSubquery =>
+        CometScalarSubquery.setSubquery(planId, sub)
+      }
+    }
+  }
+
+  private def cleanSubqueries(planId: Long, sparkPlan: SparkPlan): Unit = {
+    sparkPlan.children.foreach(cleanSubqueries(planId, _))
+
+    sparkPlan.expressions.foreach {
+      _.collect { case sub: ScalarSubquery =>
+        CometScalarSubquery.removeSubquery(planId, sub)
+      }
+    }
+  }
+
+  override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    serializedPlanOpt match {
+      case None =>
+        assert(children.length == 1) // TODO: fix this!
+        children.head.executeColumnar()
+      case Some(serializedPlan) =>
+        // Switch to use Decimal128 regardless of precision, since Arrow native execution
+        // doesn't support Decimal32 and Decimal64 yet.
+        conf.setConfString(CometConf.COMET_USE_DECIMAL_128.key, "true")
+
+        // Populate native configurations
+        val configs = new java.util.HashMap[String, String]()
+        val maxMemory = CometSparkSessionExtensions.getCometMemoryOverhead(sparkContext.getConf)
+        configs.put("memory_limit", String.valueOf(maxMemory))
+        configs.put("memory_fraction", String.valueOf(COMET_EXEC_MEMORY_FRACTION.get()))
+        configs.put("batch_size", String.valueOf(COMET_BATCH_SIZE.get()))
+        configs.put("debug_native", String.valueOf(COMET_DEBUG_ENABLED.get()))
+
+        // Strip mandatory prefix spark. which is not required for datafusion session params
+        session.conf.getAll.foreach {
+          case (k, v) if k.startsWith("spark.datafusion") =>
+            configs.put(k.replaceFirst("spark\\.", ""), v)
+          case _ =>
+        }
+        val serializedPlanCopy = serializedPlan
+        // TODO: support native metrics for all operators.
+        val nativeMetrics = CometMetricNode.fromCometPlan(this)
+
+        def createCometExecIter(inputs: Seq[Iterator[ColumnarBatch]]): CometExecIterator = {
+          val it = new CometExecIterator(
+            CometExec.newIterId,
+            inputs,
+            serializedPlanCopy,
+            configs,
+            nativeMetrics)
+
+          setSubqueries(it.id, originalPlan)
+
+          Option(TaskContext.get()).foreach { context =>
+            context.addTaskCompletionListener[Unit] { _ =>
+              it.close()
+              cleanSubqueries(it.id, originalPlan)
+            }
+          }
+
+          it
+        }
+
+        children.map(_.executeColumnar) match {
+          case Seq(child) =>
+            child.mapPartitionsInternal(iter => createCometExecIter(Seq(iter)))
+          case Seq(first, second) =>
+            first.zipPartitions(second) { (iter1, iter2) =>
+              createCometExecIter(Seq(iter1, iter2))
+            }
+          case _ =>
+            throw new CometRuntimeException(
+              s"Expected only two children but got s${children.size}")
+        }
+    }
+  }
+
+  /**
+   * Converts this native Comet operator and its children into a native block which can be
+   * executed as a whole (i.e., in a single JNI call) from the native side.
+   */
+  def convertBlock(): Unit = {
+    val out = new ByteArrayOutputStream()
+    nativeOp.writeTo(out)
+    out.close()
+    serializedPlanOpt = Some(out.toByteArray)
+  }
+}
+
+abstract class CometUnaryExec extends CometNativeExec with UnaryExecNode
+
+case class CometProjectExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    projectList: Seq[NamedExpression],
+    override val output: Seq[Attribute],
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override def producedAttributes: AttributeSet = outputSet
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] = Iterator(output, projectList, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometProjectExec =>
+        this.projectList == other.projectList &&
+        this.output == other.output && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(projectList, output, child)
+}
+
+case class CometFilterExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    condition: Expression,
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] =
+    Iterator(originalPlan.output, condition, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometFilterExec =>
+        this.condition == other.condition && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(condition, child)
+
+  override def verboseStringWithOperatorId(): String = {
+    s"""
+       |$formattedNodeName
+       |${ExplainUtils.generateFieldString("Input", child.output)}
+       |Condition : ${condition}
+       |""".stripMargin
+  }
+}
+
+case class CometSortExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    sortOrder: Seq[SortOrder],
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] =
+    Iterator(originalPlan.output, sortOrder, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometSortExec =>
+        this.sortOrder == other.sortOrder && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(sortOrder, child)
+
+  override lazy val metrics: Map[String, SQLMetric] =
+    CometMetricNode.baselineMetrics(sparkContext) ++
+      Map(
+        "spill_count" -> SQLMetrics.createMetric(sparkContext, "number of spills"),
+        "spilled_bytes" -> SQLMetrics.createNanoTimingMetric(sparkContext, "total spilled bytes"))
+}
+
+case class CometLocalLimitExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    limit: Int,
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] = Iterator(limit, child)
+
+  override lazy val metrics: Map[String, SQLMetric] = Map.empty
+}
+
+case class CometGlobalLimitExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    limit: Int,
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] = Iterator(limit, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometGlobalLimitExec =>
+        this.limit == other.limit && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(limit: java.lang.Integer, child)
+}
+
+case class CometExpandExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    projections: Seq[Seq[Expression]],
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override def producedAttributes: AttributeSet = outputSet
+
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] = Iterator(projections, output, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometExpandExec =>
+        this.projections == other.projections && this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(projections, child)
+
+  // TODO: support native Expand metrics
+  override lazy val metrics: Map[String, SQLMetric] = Map.empty
+}
+
+case class CometUnionExec(override val originalPlan: SparkPlan, children: Seq[SparkPlan])
+    extends CometExec {
+  override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    sparkContext.union(children.map(_.executeColumnar()))
+  }
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[SparkPlan]): SparkPlan =
+    this.copy(children = newChildren)
+
+  override def verboseStringWithOperatorId(): String = {
+    val childrenString = children.zipWithIndex
+      .map { case (child, index) =>
+        s"Child $index ${ExplainUtils.generateFieldString("Input", child.output)}"
+      }
+      .mkString("\n")
+    s"""
+       |$formattedNodeName
+       |$childrenString
+       |""".stripMargin
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometUnionExec => this.children == other.children
+      case _ => false
+    }
+  }
+
+  override def hashCode(): Int = Objects.hashCode(children)
+}
+
+case class CometHashAggregateExec(
+    override val nativeOp: Operator,
+    override val originalPlan: SparkPlan,
+    groupingExpressions: Seq[NamedExpression],
+    aggregateExpressions: Seq[AggregateExpression],
+    input: Seq[Attribute],
+    mode: AggregateMode,
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    this.copy(child = newChild)
+
+  override def stringArgs: Iterator[Any] =
+    Iterator(input, mode, groupingExpressions, aggregateExpressions, child)
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case other: CometHashAggregateExec =>
+        this.groupingExpressions == other.groupingExpressions &&
+        this.aggregateExpressions == other.aggregateExpressions &&
+        this.input == other.input &&
+        this.mode == other.mode &&
+        this.child == other.child
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int =
+    Objects.hashCode(groupingExpressions, aggregateExpressions, input, mode, child)
+}
+
+case class CometScanWrapper(override val nativeOp: Operator, override val originalPlan: SparkPlan)
+    extends CometNativeExec
+    with LeafExecNode {
+  override def stringArgs: Iterator[Any] = Iterator(originalPlan.output, originalPlan)
+}
+
+/**
+ * A pseudo Comet physical scan node after Comet operators. This node is used to be a placeholder
+ * for chaining with following Comet native operators after previous Comet operators. This node
+ * will be removed after `CometExecRule`.
+ *
+ * This is very similar to `CometScanWrapper` above except it has child.
+ */
+case class CometSinkPlaceHolder(
+    override val nativeOp: Operator, // Must be a Scan
+    override val originalPlan: SparkPlan,
+    child: SparkPlan)
+    extends CometUnaryExec {
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = {
+    this.copy(child = newChild)
+  }
+
+  override def stringArgs: Iterator[Any] = Iterator(originalPlan.output, child)
+}
diff --git a/spark/src/test/java/org/apache/comet/IntegrationTestSuite.java b/spark/src/test/java/org/apache/comet/IntegrationTestSuite.java
new file mode 100644
index 000000000..ebeb41d4e
--- /dev/null
+++ b/spark/src/test/java/org/apache/comet/IntegrationTestSuite.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+import org.scalatest.TagAnnotation;
+
+/**
+ * This annotation is used on integration test suites. So we can exclude these tests from execution
+ * of scalatest-maven-plugin.
+ */
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface IntegrationTestSuite {}
diff --git a/spark/src/test/resources/log4j.properties b/spark/src/test/resources/log4j.properties
new file mode 100644
index 000000000..2f46ce155
--- /dev/null
+++ b/spark/src/test/resources/log4j.properties
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+test.appender=file
+log4j.rootCategory=INFO, ${test.appender}
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/spark/src/test/resources/log4j2.properties b/spark/src/test/resources/log4j2.properties
new file mode 100644
index 000000000..04cdf8533
--- /dev/null
+++ b/spark/src/test/resources/log4j2.properties
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
+
diff --git a/spark/src/test/resources/test-data/before_1582_date_v2_4_5.snappy.parquet b/spark/src/test/resources/test-data/before_1582_date_v2_4_5.snappy.parquet
new file mode 100644
index 000000000..edd61c9b9
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_date_v2_4_5.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_date_v2_4_6.snappy.parquet b/spark/src/test/resources/test-data/before_1582_date_v2_4_6.snappy.parquet
new file mode 100644
index 000000000..01f4887f5
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_date_v2_4_6.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_date_v3_2_0.snappy.parquet b/spark/src/test/resources/test-data/before_1582_date_v3_2_0.snappy.parquet
new file mode 100644
index 000000000..25df53eb3
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_date_v3_2_0.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_5.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_5.snappy.parquet
new file mode 100644
index 000000000..c7e8d3926
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_5.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_6.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_6.snappy.parquet
new file mode 100644
index 000000000..939e2b808
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v2_4_6.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v3_2_0.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v3_2_0.snappy.parquet
new file mode 100644
index 000000000..78120fdb2
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_dict_v3_2_0.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_5.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_5.snappy.parquet
new file mode 100644
index 000000000..88a94ac48
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_5.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_6.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_6.snappy.parquet
new file mode 100644
index 000000000..68bfa33aa
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v2_4_6.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v3_2_0.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v3_2_0.snappy.parquet
new file mode 100644
index 000000000..3f5f85e24
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_int96_plain_v3_2_0.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_5.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_5.snappy.parquet
new file mode 100644
index 000000000..62e604835
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_5.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_6.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_6.snappy.parquet
new file mode 100644
index 000000000..d7fdaa3e6
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v2_4_6.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_micros_v3_2_0.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v3_2_0.snappy.parquet
new file mode 100644
index 000000000..809f6f50e
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_micros_v3_2_0.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_5.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_5.snappy.parquet
new file mode 100644
index 000000000..a7cef9e60
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_5.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_6.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_6.snappy.parquet
new file mode 100644
index 000000000..4c213f454
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v2_4_6.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/before_1582_timestamp_millis_v3_2_0.snappy.parquet b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v3_2_0.snappy.parquet
new file mode 100644
index 000000000..f9cfa4a1c
Binary files /dev/null and b/spark/src/test/resources/test-data/before_1582_timestamp_millis_v3_2_0.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/dec-in-fixed-len.parquet b/spark/src/test/resources/test-data/dec-in-fixed-len.parquet
new file mode 100644
index 000000000..6ad37d563
Binary files /dev/null and b/spark/src/test/resources/test-data/dec-in-fixed-len.parquet differ
diff --git a/spark/src/test/resources/test-data/decimal32-written-as-64-bit-dict.snappy.parquet b/spark/src/test/resources/test-data/decimal32-written-as-64-bit-dict.snappy.parquet
new file mode 100644
index 000000000..e19bd7ae0
Binary files /dev/null and b/spark/src/test/resources/test-data/decimal32-written-as-64-bit-dict.snappy.parquet differ
diff --git a/spark/src/test/resources/test-data/decimal32-written-as-64-bit.snappy.parquet b/spark/src/test/resources/test-data/decimal32-written-as-64-bit.snappy.parquet
new file mode 100644
index 000000000..1068a4e41
Binary files /dev/null and b/spark/src/test/resources/test-data/decimal32-written-as-64-bit.snappy.parquet differ
diff --git a/spark/src/test/resources/tpch-extended/q1.sql b/spark/src/test/resources/tpch-extended/q1.sql
new file mode 100644
index 000000000..50eebc342
--- /dev/null
+++ b/spark/src/test/resources/tpch-extended/q1.sql
@@ -0,0 +1,9 @@
+select
+        sum(o_custkey)
+from
+        orders
+where
+        o_orderpriority = '1-URGENT'
+        or o_orderpriority = '2-HIGH'
+group by
+        o_orderkey
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
new file mode 100644
index 000000000..5ead490c2
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -0,0 +1,1262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.util
+
+import org.apache.hadoop.fs.Path
+import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.functions.expr
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
+import org.apache.spark.sql.types.{Decimal, DecimalType, StructType}
+
+import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark34Plus}
+
+class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
+  import testImplicits._
+
+  test("basic data type support") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "tbl") {
+          // TODO: enable test for unsigned ints
+          checkSparkAnswerAndOperator(
+            "select _1, _2, _3, _4, _5, _6, _7, _8, _13, _14, _15, _16, _17, " +
+              "_18, _19, _20 FROM tbl WHERE _2 > 100")
+        }
+      }
+    }
+  }
+
+  test("null literals") {
+    val batchSize = 1000
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, batchSize)
+        withParquetTable(path.toString, "tbl") {
+          val sqlString = "SELECT _4 + null, _15 - null, _16 * null  FROM tbl"
+          val df2 = sql(sqlString)
+          val rows = df2.collect()
+          assert(rows.length == batchSize)
+          assert(rows.forall(_ == Row(null, null, null)))
+
+          checkSparkAnswerAndOperator(sqlString)
+        }
+      }
+    }
+  }
+
+  test("date and timestamp type literals") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "tbl") {
+          checkSparkAnswerAndOperator(
+            "SELECT _4 FROM tbl WHERE " +
+              "_20 > CAST('2020-01-01' AS DATE) AND _18 < CAST('2020-01-01' AS TIMESTAMP)")
+        }
+      }
+    }
+  }
+
+  test("dictionary arithmetic") {
+    // TODO: test ANSI mode
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false", "parquet.enable.dictionary" -> "true") {
+      withParquetTable((0 until 10).map(i => (i % 5, i % 3)), "tbl") {
+        checkSparkAnswerAndOperator("SELECT _1 + _2, _1 - _2, _1 * _2, _1 / _2, _1 % _2 FROM tbl")
+      }
+    }
+  }
+
+  test("dictionary arithmetic with scalar") {
+    withSQLConf("parquet.enable.dictionary" -> "true") {
+      withParquetTable((0 until 10).map(i => (i % 5, i % 3)), "tbl") {
+        checkSparkAnswerAndOperator("SELECT _1 + 1, _1 - 1, _1 * 2, _1 / 2, _1 % 2 FROM tbl")
+      }
+    }
+  }
+
+  test("string type and substring") {
+    withParquetTable((0 until 5).map(i => (i.toString, (i + 100).toString)), "tbl") {
+      checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, 2) FROM tbl")
+    }
+  }
+
+  test("substring with start < 1") {
+    withTempPath { _ =>
+      withTable("t") {
+        sql("create table t (col string) using parquet")
+        sql("insert into t values('123456')")
+        checkSparkAnswerAndOperator(sql("select substring(col, 0) from t"))
+        checkSparkAnswerAndOperator(sql("select substring(col, -1) from t"))
+      }
+    }
+  }
+
+  test("string with coalesce") {
+    withParquetTable(
+      (0 until 10).map(i => (i.toString, if (i > 5) None else Some((i + 100).toString))),
+      "tbl") {
+      checkSparkAnswerAndOperator(
+        "SELECT coalesce(_1), coalesce(_1, 1), coalesce(null, _1), coalesce(null, 1), coalesce(_2, _1), coalesce(null) FROM tbl")
+    }
+  }
+
+  test("substring with dictionary") {
+    val data = (0 until 1000)
+      .map(_ % 5) // reduce value space to trigger dictionary encoding
+      .map(i => (i.toString, (i + 100).toString))
+    withParquetTable(data, "tbl") {
+      checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, 2) FROM tbl")
+    }
+  }
+
+  test("string_space") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      checkSparkAnswerAndOperator("SELECT space(_1), space(_2) FROM tbl")
+    }
+  }
+
+  test("string_space with dictionary") {
+    val data = (0 until 1000).map(i => Tuple1(i % 5))
+
+    withSQLConf("parquet.enable.dictionary" -> "true") {
+      withParquetTable(data, "tbl") {
+        checkSparkAnswerAndOperator("SELECT space(_1) FROM tbl")
+      }
+    }
+  }
+
+  test("hour, minute, second") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        val expected = makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        readParquetFile(path.toString) { df =>
+          val query = df.select(expr("hour(_1)"), expr("minute(_1)"), expr("second(_1)"))
+
+          checkAnswer(
+            query,
+            expected.map {
+              case None =>
+                Row(null, null, null)
+              case Some(i) =>
+                val hour = new java.sql.Timestamp(i).getHours
+                val minute = new java.sql.Timestamp(i).getMinutes
+                val second = new java.sql.Timestamp(i).getSeconds
+
+                Row(hour, minute, second)
+            })
+        }
+      }
+    }
+  }
+
+  test("hour on int96 timestamp column") {
+    import testImplicits._
+
+    val N = 100
+    val ts = "2020-01-01 01:02:03.123456"
+    Seq(true, false).foreach { dictionaryEnabled =>
+      Seq(false, true).foreach { conversionEnabled =>
+        withSQLConf(
+          SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> "INT96",
+          SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key -> conversionEnabled.toString) {
+          withTempPath { path =>
+            Seq
+              .tabulate(N)(_ => ts)
+              .toDF("ts1")
+              .select($"ts1".cast("timestamp").as("ts"))
+              .repartition(1)
+              .write
+              .option("parquet.enable.dictionary", dictionaryEnabled)
+              .parquet(path.getCanonicalPath)
+
+            checkAnswer(
+              spark.read.parquet(path.getCanonicalPath).select(expr("hour(ts)")),
+              Seq.tabulate(N)(_ => Row(1)))
+          }
+        }
+      }
+    }
+  }
+
+  test("cast timestamp and timestamp_ntz") {
+    withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu") {
+      Seq(true, false).foreach { dictionaryEnabled =>
+        withTempDir { dir =>
+          val path = new Path(dir.toURI.toString, "timestamp_trunc.parquet")
+          makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+          withParquetTable(path.toString, "timetbl") {
+            checkSparkAnswerAndOperator(
+              "SELECT " +
+                "cast(_2 as timestamp) tz_millis, " +
+                "cast(_3 as timestamp) ntz_millis, " +
+                "cast(_4 as timestamp) tz_micros, " +
+                "cast(_5 as timestamp) ntz_micros " +
+                " from timetbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("cast timestamp and timestamp_ntz to string") {
+    // TODO: make the test pass for Spark 3.2 & 3.3
+    assume(isSpark34Plus)
+
+    withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu") {
+      Seq(true, false).foreach { dictionaryEnabled =>
+        withTempDir { dir =>
+          val path = new Path(dir.toURI.toString, "timestamp_trunc.parquet")
+          makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 2001)
+          withParquetTable(path.toString, "timetbl") {
+            checkSparkAnswerAndOperator(
+              "SELECT " +
+                "cast(_2 as string) tz_millis, " +
+                "cast(_3 as string) ntz_millis, " +
+                "cast(_4 as string) tz_micros, " +
+                "cast(_5 as string) ntz_micros " +
+                " from timetbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("cast timestamp and timestamp_ntz to long, date") {
+    // TODO: make the test pass for Spark 3.2 & 3.3
+    assume(isSpark34Plus)
+
+    withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu") {
+      Seq(true, false).foreach { dictionaryEnabled =>
+        withTempDir { dir =>
+          val path = new Path(dir.toURI.toString, "timestamp_trunc.parquet")
+          makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+          withParquetTable(path.toString, "timetbl") {
+            checkSparkAnswerAndOperator(
+              "SELECT " +
+                "cast(_2 as long) tz_millis, " +
+                "cast(_4 as long) tz_micros, " +
+                "cast(_2 as date) tz_millis_to_date, " +
+                "cast(_3 as date) ntz_millis_to_date, " +
+                "cast(_4 as date) tz_micros_to_date, " +
+                "cast(_5 as date) ntz_micros_to_date " +
+                " from timetbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("trunc") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "date_trunc.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "tbl") {
+          Seq("YEAR", "YYYY", "YY", "QUARTER", "MON", "MONTH", "MM", "WEEK").foreach { format =>
+            checkSparkAnswerAndOperator(s"SELECT trunc(_20, '$format') from tbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("date_trunc") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "timestamp_trunc.parquet")
+        makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "timetbl") {
+          Seq(
+            "YEAR",
+            "YYYY",
+            "YY",
+            "MON",
+            "MONTH",
+            "MM",
+            "QUARTER",
+            "WEEK",
+            "DAY",
+            "DD",
+            "HOUR",
+            "MINUTE",
+            "SECOND",
+            "MILLISECOND",
+            "MICROSECOND").foreach { format =>
+            checkSparkAnswerAndOperator(
+              "SELECT " +
+                s"date_trunc('$format', _0), " +
+                s"date_trunc('$format', _1), " +
+                s"date_trunc('$format', _2), " +
+                s"date_trunc('$format', _4) " +
+                " from timetbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("date_trunc with timestamp_ntz") {
+    assume(!isSpark32, "timestamp functions for timestamp_ntz have incorrect behavior in 3.2")
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "timestamp_trunc.parquet")
+        makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        withParquetTable(path.toString, "timetbl") {
+          Seq(
+            "YEAR",
+            "YYYY",
+            "YY",
+            "MON",
+            "MONTH",
+            "MM",
+            "QUARTER",
+            "WEEK",
+            "DAY",
+            "DD",
+            "HOUR",
+            "MINUTE",
+            "SECOND",
+            "MILLISECOND",
+            "MICROSECOND").foreach { format =>
+            checkSparkAnswerAndOperator(
+              "SELECT " +
+                s"date_trunc('$format', _3), " +
+                s"date_trunc('$format', _5)  " +
+                " from timetbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("date_trunc on int96 timestamp column") {
+    import testImplicits._
+
+    val N = 100
+    val ts = "2020-01-01 01:02:03.123456"
+    Seq(true, false).foreach { dictionaryEnabled =>
+      Seq(false, true).foreach { conversionEnabled =>
+        withSQLConf(
+          SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> "INT96",
+          SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key -> conversionEnabled.toString) {
+          withTempPath { path =>
+            Seq
+              .tabulate(N)(_ => ts)
+              .toDF("ts1")
+              .select($"ts1".cast("timestamp").as("ts"))
+              .repartition(1)
+              .write
+              .option("parquet.enable.dictionary", dictionaryEnabled)
+              .parquet(path.getCanonicalPath)
+
+            withParquetTable(path.toString, "int96timetbl") {
+              Seq(
+                "YEAR",
+                "YYYY",
+                "YY",
+                "MON",
+                "MONTH",
+                "MM",
+                "QUARTER",
+                "WEEK",
+                "DAY",
+                "DD",
+                "HOUR",
+                "MINUTE",
+                "SECOND",
+                "MILLISECOND",
+                "MICROSECOND").foreach { format =>
+                checkSparkAnswer(
+                  "SELECT " +
+                    s"date_trunc('$format', ts )" +
+                    " from int96timetbl")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("charvarchar") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "char_tbl4"
+        withTable(table) {
+          val view = "str_view"
+          withView(view) {
+            sql(s"""create temporary view $view as select c, v from values
+                   | (null, null), (null, null),
+                   | (null, 'S'), (null, 'S'),
+                   | ('N', 'N '), ('N', 'N '),
+                   | ('Ne', 'Sp'), ('Ne', 'Sp'),
+                   | ('Net  ', 'Spa  '), ('Net  ', 'Spa  '),
+                   | ('NetE', 'Spar'), ('NetE', 'Spar'),
+                   | ('NetEa ', 'Spark '), ('NetEa ', 'Spark '),
+                   | ('NetEas ', 'Spark'), ('NetEas ', 'Spark'),
+                   | ('NetEase', 'Spark-'), ('NetEase', 'Spark-') t(c, v);""".stripMargin)
+            sql(
+              s"create table $table(c7 char(7), c8 char(8), v varchar(6), s string) using parquet;")
+            sql(s"insert into $table select c, c, v, c from $view;")
+            val df = sql(s"""select substring(c7, 2), substring(c8, 2),
+                            | substring(v, 3), substring(s, 2) from $table;""".stripMargin)
+
+            val expected = Row("      ", "       ", "", "") ::
+              Row(null, null, "", null) :: Row(null, null, null, null) ::
+              Row("e     ", "e      ", "", "e") :: Row("et    ", "et     ", "a  ", "et  ") ::
+              Row("etE   ", "etE    ", "ar", "etE") ::
+              Row("etEa  ", "etEa   ", "ark ", "etEa ") ::
+              Row("etEas ", "etEas  ", "ark", "etEas ") ::
+              Row("etEase", "etEase ", "ark-", "etEase") :: Nil
+            checkAnswer(df, expected ::: expected)
+          }
+        }
+      }
+    }
+  }
+
+  test("char varchar over length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withTempPath { dir =>
+        withTable("t") {
+          sql("select '123456' as col").write.format("parquet").save(dir.toString)
+          sql(s"create table t (col $typ(2)) using parquet location '$dir'")
+          sql("insert into t values('1')")
+          checkSparkAnswerAndOperator(sql("select substring(col, 1) from t"))
+          checkSparkAnswerAndOperator(sql("select substring(col, 0) from t"))
+          checkSparkAnswerAndOperator(sql("select substring(col, -1) from t"))
+        }
+      }
+    }
+  }
+
+  test("like (LikeSimplification enabled)") {
+    val table = "names"
+    withTable(table) {
+      sql(s"create table $table(id int, name varchar(20)) using parquet")
+      sql(s"insert into $table values(1,'James Smith')")
+      sql(s"insert into $table values(2,'Michael Rose')")
+      sql(s"insert into $table values(3,'Robert Williams')")
+      sql(s"insert into $table values(4,'Rames Rose')")
+      sql(s"insert into $table values(5,'Rames rose')")
+
+      // Filter column having values 'Rames _ose', where any character matches for '_'
+      val query = sql(s"select id from $table where name like 'Rames _ose'")
+      checkAnswer(query, Row(4) :: Row(5) :: Nil)
+
+      // Filter rows that contains 'rose' in 'name' column
+      val queryContains = sql(s"select id from $table where name like '%rose%'")
+      checkAnswer(queryContains, Row(5) :: Nil)
+
+      // Filter rows that starts with 'R' following by any characters
+      val queryStartsWith = sql(s"select id from $table where name like 'R%'")
+      checkAnswer(queryStartsWith, Row(3) :: Row(4) :: Row(5) :: Nil)
+
+      // Filter rows that ends with 's' following by any characters
+      val queryEndsWith = sql(s"select id from $table where name like '%s'")
+      checkAnswer(queryEndsWith, Row(3) :: Nil)
+    }
+  }
+
+  test("contains") {
+    assume(!isSpark32)
+
+    val table = "names"
+    withTable(table) {
+      sql(s"create table $table(id int, name varchar(20)) using parquet")
+      sql(s"insert into $table values(1,'James Smith')")
+      sql(s"insert into $table values(2,'Michael Rose')")
+      sql(s"insert into $table values(3,'Robert Williams')")
+      sql(s"insert into $table values(4,'Rames Rose')")
+      sql(s"insert into $table values(5,'Rames rose')")
+
+      // Filter rows that contains 'rose' in 'name' column
+      val queryContains = sql(s"select id from $table where contains (name, 'rose')")
+      checkAnswer(queryContains, Row(5) :: Nil)
+    }
+  }
+
+  test("startswith") {
+    assume(!isSpark32)
+
+    val table = "names"
+    withTable(table) {
+      sql(s"create table $table(id int, name varchar(20)) using parquet")
+      sql(s"insert into $table values(1,'James Smith')")
+      sql(s"insert into $table values(2,'Michael Rose')")
+      sql(s"insert into $table values(3,'Robert Williams')")
+      sql(s"insert into $table values(4,'Rames Rose')")
+      sql(s"insert into $table values(5,'Rames rose')")
+
+      // Filter rows that starts with 'R' following by any characters
+      val queryStartsWith = sql(s"select id from $table where startswith (name, 'R')")
+      checkAnswer(queryStartsWith, Row(3) :: Row(4) :: Row(5) :: Nil)
+    }
+  }
+
+  test("endswith") {
+    assume(!isSpark32)
+
+    val table = "names"
+    withTable(table) {
+      sql(s"create table $table(id int, name varchar(20)) using parquet")
+      sql(s"insert into $table values(1,'James Smith')")
+      sql(s"insert into $table values(2,'Michael Rose')")
+      sql(s"insert into $table values(3,'Robert Williams')")
+      sql(s"insert into $table values(4,'Rames Rose')")
+      sql(s"insert into $table values(5,'Rames rose')")
+
+      // Filter rows that ends with 's' following by any characters
+      val queryEndsWith = sql(s"select id from $table where endswith (name, 's')")
+      checkAnswer(queryEndsWith, Row(3) :: Nil)
+    }
+  }
+
+  test("add overflow (ANSI disable)") {
+    // Enabling ANSI will cause native engine failure, but as we cannot catch
+    // native error now, we cannot test it here.
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      withParquetTable(Seq((Int.MaxValue, 1)), "tbl") {
+        checkSparkAnswerAndOperator("SELECT _1 + _2 FROM tbl")
+      }
+    }
+  }
+
+  test("divide by zero (ANSI disable)") {
+    // Enabling ANSI will cause native engine failure, but as we cannot catch
+    // native error now, we cannot test it here.
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      withParquetTable(Seq((1, 0, 1.0, 0.0)), "tbl") {
+        checkSparkAnswerAndOperator("SELECT _1 / _2, _3 / _4 FROM tbl")
+      }
+    }
+  }
+
+  test("decimals arithmetic and comparison") {
+    // TODO: enable Spark 3.2 & 3.3 tests after supporting decimal reminder operation
+    assume(isSpark34Plus)
+
+    def makeDecimalRDD(num: Int, decimal: DecimalType, useDictionary: Boolean): DataFrame = {
+      val div = if (useDictionary) 5 else num // narrow the space to make it dictionary encoded
+      spark
+        .range(num)
+        .map(_ % div)
+        // Parquet doesn't allow column names with spaces, have to add an alias here.
+        // Minus 500 here so that negative decimals are also tested.
+        .select(
+          (($"value" - 500) / 100.0) cast decimal as Symbol("dec1"),
+          (($"value" - 600) / 100.0) cast decimal as Symbol("dec2"))
+        .coalesce(1)
+    }
+
+    Seq(true, false).foreach { dictionary =>
+      Seq(16, 1024).foreach { batchSize =>
+        withSQLConf(
+          CometConf.COMET_BATCH_SIZE.key -> batchSize.toString,
+          SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> "false",
+          "parquet.enable.dictionary" -> dictionary.toString) {
+          var combinations = Seq((5, 2), (1, 0), (18, 10), (18, 17), (19, 0), (38, 37))
+          // If ANSI mode is on, the combination (1, 1) will cause a runtime error. Otherwise, the
+          // decimal RDD contains all null values and should be able to read back from Parquet.
+
+          if (!SQLConf.get.ansiEnabled) {
+            combinations = combinations ++ Seq((1, 1))
+          }
+
+          for ((precision, scale) <- combinations) {
+            withTempPath { dir =>
+              val data = makeDecimalRDD(10, DecimalType(precision, scale), dictionary)
+              data.write.parquet(dir.getCanonicalPath)
+              readParquetFile(dir.getCanonicalPath) { df =>
+                {
+                  val decimalLiteral1 = Decimal(1.00)
+                  val decimalLiteral2 = Decimal(123.456789)
+                  val cometDf = df.select(
+                    $"dec1" + $"dec2",
+                    $"dec1" - $"dec2",
+                    $"dec1" % $"dec2",
+                    $"dec1" >= $"dec1",
+                    $"dec1" === "1.0",
+                    $"dec1" + decimalLiteral1,
+                    $"dec1" - decimalLiteral1,
+                    $"dec1" + decimalLiteral2,
+                    $"dec1" - decimalLiteral2)
+
+                  checkAnswer(
+                    cometDf,
+                    data
+                      .select(
+                        $"dec1" + $"dec2",
+                        $"dec1" - $"dec2",
+                        $"dec1" % $"dec2",
+                        $"dec1" >= $"dec1",
+                        $"dec1" === "1.0",
+                        $"dec1" + decimalLiteral1,
+                        $"dec1" - decimalLiteral1,
+                        $"dec1" + decimalLiteral2,
+                        $"dec1" - decimalLiteral2)
+                      .collect()
+                      .toSeq)
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("scalar decimal arithmetic operations") {
+    assume(isSpark34Plus)
+    withTable("tbl") {
+      withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+        sql("CREATE TABLE tbl (a INT) USING PARQUET")
+        sql("INSERT INTO tbl VALUES (0)")
+
+        val combinations = Seq((7, 3), (18, 10), (38, 4))
+        for ((precision, scale) <- combinations) {
+          for (op <- Seq("+", "-", "*", "/", "%")) {
+            val left = s"CAST(1.00 AS DECIMAL($precision, $scale))"
+            val right = s"CAST(123.45 AS DECIMAL($precision, $scale))"
+
+            withSQLConf(
+              "spark.sql.optimizer.excludedRules" ->
+                "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
+
+              checkSparkAnswerAndOperator(s"SELECT $left $op $right FROM tbl")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("cast decimals to int") {
+    Seq(16, 1024).foreach { batchSize =>
+      withSQLConf(
+        CometConf.COMET_BATCH_SIZE.key -> batchSize.toString,
+        SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> "false") {
+        var combinations = Seq((5, 2), (1, 0), (18, 10), (18, 17), (19, 0), (38, 37))
+        // If ANSI mode is on, the combination (1, 1) will cause a runtime error. Otherwise, the
+        // decimal RDD contains all null values and should be able to read back from Parquet.
+
+        if (!SQLConf.get.ansiEnabled) {
+          combinations = combinations ++ Seq((1, 1))
+        }
+
+        for ((precision, scale) <- combinations; useDictionary <- Seq(false)) {
+          withTempPath { dir =>
+            val data = makeDecimalRDD(10, DecimalType(precision, scale), useDictionary)
+            data.write.parquet(dir.getCanonicalPath)
+            readParquetFile(dir.getCanonicalPath) { df =>
+              {
+                val cometDf = df.select($"dec".cast("int"))
+
+                // `data` is not read from Parquet, so it doesn't go Comet exec.
+                checkAnswer(cometDf, data.select($"dec".cast("int")).collect().toSeq)
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("various math scalar functions") {
+    Seq("true", "false").foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary) {
+        withParquetTable(
+          (0 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
+          "tbl",
+          withDictionary = dictionary.toBoolean) {
+          checkSparkAnswerWithTol(
+            "SELECT abs(_1), acos(_2), asin(_1), atan(_2), atan2(_1, _2), cos(_1) FROM tbl")
+          checkSparkAnswerWithTol(
+            "SELECT exp(_1), ln(_2), log10(_1), log2(_1), pow(_1, _2) FROM tbl")
+          // TODO: comment in the round tests once supported
+          // checkSparkAnswerWithTol("SELECT round(_1), round(_2) FROM tbl")
+          checkSparkAnswerWithTol("SELECT signum(_1), sin(_1), sqrt(_1) FROM tbl")
+          checkSparkAnswerWithTol("SELECT tan(_1) FROM tbl")
+        }
+      }
+    }
+  }
+
+  test("abs") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 100)
+        withParquetTable(path.toString, "tbl") {
+          Seq(2, 3, 4, 5, 6, 7, 15, 16, 17).foreach { col =>
+            checkSparkAnswerAndOperator(s"SELECT abs(_${col}) FROM tbl")
+          }
+        }
+      }
+    }
+  }
+
+  test("ceil and floor") {
+    Seq("true", "false").foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary) {
+        withParquetTable(
+          (-5 until 5).map(i => (i.toDouble + 0.3, i.toDouble + 0.8)),
+          "tbl",
+          withDictionary = dictionary.toBoolean) {
+          checkSparkAnswerAndOperator("SELECT ceil(_1), ceil(_2), floor(_1), floor(_2) FROM tbl")
+          checkSparkAnswerAndOperator(
+            "SELECT ceil(0.0), ceil(-0.0), ceil(-0.5), ceil(0.5), ceil(-1.2), ceil(1.2) FROM tbl")
+          checkSparkAnswerAndOperator(
+            "SELECT floor(0.0), floor(-0.0), floor(-0.5), floor(0.5), " +
+              "floor(-1.2), floor(1.2) FROM tbl")
+        }
+        withParquetTable(
+          (-5 until 5).map(i => (i.toLong, i.toLong)),
+          "tbl",
+          withDictionary = dictionary.toBoolean) {
+          checkSparkAnswerAndOperator("SELECT ceil(_1), ceil(_2), floor(_1), floor(_2) FROM tbl")
+          checkSparkAnswerAndOperator(
+            "SELECT ceil(0), ceil(-0), ceil(-5), ceil(5), ceil(-1), ceil(1) FROM tbl")
+          checkSparkAnswerAndOperator(
+            "SELECT floor(0), floor(-0), floor(-5), floor(5), " +
+              "floor(-1), floor(1) FROM tbl")
+        }
+        withParquetTable(
+          (-33L to 33L by 3L).map(i => Tuple1(Decimal(i, 21, 1))), // -3.3 ~ +3.3
+          "tbl",
+          withDictionary = dictionary.toBoolean) {
+          checkSparkAnswerAndOperator("SELECT ceil(_1), floor(_1) FROM tbl")
+          checkSparkAnswerAndOperator("SELECT ceil(cast(_1 as decimal(20, 0))) FROM tbl")
+          checkSparkAnswerAndOperator("SELECT floor(cast(_1 as decimal(20, 0))) FROM tbl")
+          withSQLConf(
+            // Exclude the constant folding optimizer in order to actually execute the native ceil
+            // and floor operations for scalar (literal) values.
+            "spark.sql.optimizer.excludedRules" -> "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
+            for (n <- Seq("0.0", "-0.0", "0.5", "-0.5", "1.2", "-1.2")) {
+              checkSparkAnswerAndOperator(s"SELECT ceil(cast(${n} as decimal(38, 18))) FROM tbl")
+              checkSparkAnswerAndOperator(s"SELECT ceil(cast(${n} as decimal(20, 0))) FROM tbl")
+              checkSparkAnswerAndOperator(s"SELECT floor(cast(${n} as decimal(38, 18))) FROM tbl")
+              checkSparkAnswerAndOperator(s"SELECT floor(cast(${n} as decimal(20, 0))) FROM tbl")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("round") {
+    assume(
+      !isSpark32,
+      "round function for Spark 3.2 does not allow negative target scale and has different result precision/scale for decimals")
+
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test.parquet")
+        makeParquetFileAllTypes(
+          path,
+          dictionaryEnabled = dictionaryEnabled,
+          -128,
+          128,
+          randomSize = 100)
+        withParquetTable(path.toString, "tbl") {
+          for (s <- Seq(-5, -1, 0, 1, 5, -1000, 1000, -323, -308, 308, -15, 15, -16, 16, null)) {
+            // array tests
+            // TODO: enable test for unsigned ints (_9, _10, _11, _12)
+            // TODO: enable test for floats (_6, _7, _8, _13)
+            for (c <- Seq(2, 3, 4, 5, 15, 16, 17)) {
+              checkSparkAnswerAndOperator(s"select _${c}, round(_${c}, ${s}) FROM tbl")
+            }
+            // scalar tests
+            // Exclude the constant folding optimizer in order to actually execute the native round
+            // operations for scalar (literal) values.
+            // TODO: comment in the tests for float once supported
+            withSQLConf(
+              "spark.sql.optimizer.excludedRules" -> "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
+              for (n <- Seq("0.0", "-0.0", "0.5", "-0.5", "1.2", "-1.2")) {
+                checkSparkAnswerAndOperator(s"select round(cast(${n} as tinyint), ${s}) FROM tbl")
+                // checkSparkAnswerAndCometOperators(s"select round(cast(${n} as float), ${s}) FROM tbl")
+                checkSparkAnswerAndOperator(
+                  s"select round(cast(${n} as decimal(38, 18)), ${s}) FROM tbl")
+                checkSparkAnswerAndOperator(
+                  s"select round(cast(${n} as decimal(20, 0)), ${s}) FROM tbl")
+              }
+              // checkSparkAnswer(s"select round(double('infinity'), ${s}) FROM tbl")
+              // checkSparkAnswer(s"select round(double('-infinity'), ${s}) FROM tbl")
+              // checkSparkAnswer(s"select round(double('NaN'), ${s}) FROM tbl")
+              // checkSparkAnswer(
+              //   s"select round(double('0.000000000000000000000000000000000001'), ${s}) FROM tbl")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("Various String scalar functions") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "names"
+        withTable(table) {
+          sql(s"create table $table(id int, name varchar(20)) using parquet")
+          sql(
+            s"insert into $table values(1, 'James Smith'), (2, 'Michael Rose')," +
+              " (3, 'Robert Williams'), (4, 'Rames Rose'), (5, 'James Smith')")
+          checkSparkAnswerAndOperator(
+            s"SELECT ascii(name), bit_length(name), octet_length(name), upper(name), lower(name) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("Chr") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col varchar(20)) using parquet")
+          sql(
+            s"insert into $table values('65'), ('66'), ('67'), ('68'), ('65'), ('66'), ('67'), ('68')")
+          checkSparkAnswerAndOperator(s"SELECT chr(col) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("InitCap") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "names"
+        withTable(table) {
+          sql(s"create table $table(id int, name varchar(20)) using parquet")
+          sql(
+            s"insert into $table values(1, 'james smith'), (2, 'michael rose'), " +
+              "(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith')")
+          checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("trim") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col varchar(20)) using parquet")
+          sql(s"insert into $table values('    SparkSQL   '), ('SSparkSQLS')")
+
+          checkSparkAnswerAndOperator(s"SELECT upper(trim(col)) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT trim('SL', col) FROM $table")
+
+          checkSparkAnswerAndOperator(s"SELECT upper(btrim(col)) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT btrim('SL', col) FROM $table")
+
+          checkSparkAnswerAndOperator(s"SELECT upper(ltrim(col)) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT ltrim('SL', col) FROM $table")
+
+          checkSparkAnswerAndOperator(s"SELECT upper(rtrim(col)) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT rtrim('SL', col) FROM $table")
+        }
+      }
+    }
+  }
+
+  // TODO: enable this when we add md5 function to Comet
+  ignore("md5") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col String) using parquet")
+          sql(
+            s"insert into $table values ('test1'), ('test1'), ('test2'), ('test2'), (NULL), ('')")
+          checkSparkAnswerAndOperator(s"select md5(col) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("string concat_ws") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "names"
+        withTable(table) {
+          sql(
+            s"create table $table(id int, first_name varchar(20), middle_initial char(1), last_name varchar(20)) using parquet")
+          sql(
+            s"insert into $table values(1, 'James', 'B', 'Taylor'), (2, 'Smith', 'C', 'Davis')," +
+              " (3, NULL, NULL, NULL), (4, 'Smith', 'C', 'Davis')")
+          checkSparkAnswerAndOperator(
+            s"SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("string repeat") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "names"
+        withTable(table) {
+          sql(s"create table $table(id int, name varchar(20)) using parquet")
+          sql(s"insert into $table values(1, 'James'), (2, 'Smith'), (3, 'Smith')")
+          checkSparkAnswerAndOperator(s"SELECT repeat(name, 3) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("length, reverse, instr, replace, translate") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col string) using parquet")
+          sql(
+            s"insert into $table values('Spark SQL  '), (NULL), (''), ('苹果手机'), ('Spark SQL  '), (NULL), (''), ('苹果手机')")
+          checkSparkAnswerAndOperator("select length(col), reverse(col), instr(col, 'SQL'), instr(col, '手机'), replace(col, 'SQL', '123')," +
+            s" replace(col, 'SQL'), replace(col, '手机', '平板'), translate(col, 'SL苹', '123') from $table")
+        }
+      }
+    }
+  }
+
+  test("EqualNullSafe should preserve comet filter") {
+    Seq("true", "false").foreach(b =>
+      withParquetTable(
+        data = (0 until 8).map(i => (i, if (i > 5) None else Some(i % 2 == 0))),
+        tableName = "tbl",
+        withDictionary = b.toBoolean) {
+        // IS TRUE
+        Seq("SELECT * FROM tbl where _2 is true", "SELECT * FROM tbl where _2 <=> true")
+          .foreach(s => checkSparkAnswerAndOperator(s))
+
+        // IS FALSE
+        Seq("SELECT * FROM tbl where _2 is false", "SELECT * FROM tbl where _2 <=> false")
+          .foreach(s => checkSparkAnswerAndOperator(s))
+
+        // IS NOT TRUE
+        Seq("SELECT * FROM tbl where _2 is not true", "SELECT * FROM tbl where not _2 <=> true")
+          .foreach(s => checkSparkAnswerAndOperator(s))
+
+        // IS NOT FALSE
+        Seq("SELECT * FROM tbl where _2 is not false", "SELECT * FROM tbl where not _2 <=> false")
+          .foreach(s => checkSparkAnswerAndOperator(s))
+      })
+  }
+
+  test("bitwise expressions") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col1 int, col2 int) using parquet")
+          sql(s"insert into $table values(1111, 2)")
+          sql(s"insert into $table values(1111, 2)")
+          sql(s"insert into $table values(3333, 4)")
+          sql(s"insert into $table values(5555, 6)")
+
+          checkSparkAnswerAndOperator(
+            s"SELECT col1 & col2,  col1 | col2, col1 ^ col2 FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT col1 & 1234,  col1 | 1234, col1 ^ 1234 FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT shiftright(col1, 2), shiftright(col1, col2) FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT shiftleft(col1, 2), shiftleft(col1, col2) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT ~(11), ~col1, ~col2 FROM $table")
+        }
+      }
+    }
+  }
+
+  test("test in/not in") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "names"
+        withTable(table) {
+          sql(s"create table $table(id int, name varchar(20)) using parquet")
+          sql(
+            s"insert into $table values(1, 'James'), (1, 'Jones'), (2, 'Smith'), (3, 'Smith')," +
+              "(NULL, 'Jones'), (4, NULL)")
+
+          checkSparkAnswerAndOperator(s"SELECT * FROM $table WHERE id in (1, 2, 4, NULL)")
+          checkSparkAnswerAndOperator(
+            s"SELECT * FROM $table WHERE name in ('Smith', 'Brown', NULL)")
+
+          // TODO: why with not in, the plan is only `LocalTableScan`?
+          checkSparkAnswer(s"SELECT * FROM $table WHERE id not in (1)")
+          checkSparkAnswer(s"SELECT * FROM $table WHERE name not in ('Smith', 'Brown', NULL)")
+        }
+      }
+    }
+  }
+
+  test("case_when") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(id int) using parquet")
+          sql(s"insert into $table values(1), (NULL), (2), (2), (3), (3), (4), (5), (NULL)")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE WHEN id > 2 THEN 3333 WHEN id > 1 THEN 2222 ELSE 1111 END FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE WHEN id > 2 THEN NULL WHEN id > 1 THEN 2222 ELSE 1111 END FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE id WHEN 1 THEN 1111 WHEN 2 THEN 2222 ELSE 3333 END FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE id WHEN 1 THEN 1111 WHEN 2 THEN 2222 ELSE NULL END FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE id WHEN 1 THEN 1111 WHEN 2 THEN 2222 WHEN 3 THEN 3333 WHEN 4 THEN 4444 END FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT CASE id WHEN NULL THEN 0 WHEN 1 THEN 1111 WHEN 2 THEN 2222 ELSE 3333 END FROM $table")
+        }
+      }
+    }
+  }
+
+  test("not") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col1 int, col2 boolean) using parquet")
+          sql(s"insert into $table values(1, false), (2, true), (3, true), (3, false)")
+          checkSparkAnswerAndOperator(s"SELECT col1, col2, NOT(col2), !(col2) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("negative") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col1 int) using parquet")
+          sql(s"insert into $table values(1), (2), (3), (3)")
+          checkSparkAnswerAndOperator(s"SELECT negative(col1), -(col1) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("conditional expressions") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test1"
+        withTable(table) {
+          sql(s"create table $table(c1 int, c2 string, c3 int) using parquet")
+          sql(
+            s"insert into $table values(1, 'comet', 1), (2, 'comet', 3), (null, 'spark', 4)," +
+              " (null, null, 4), (2, 'spark', 3), (2, 'comet', 3)")
+          checkSparkAnswerAndOperator(s"SELECT if (c1 < 2, 1111, 2222) FROM $table")
+          checkSparkAnswerAndOperator(s"SELECT if (c1 < c3, 1111, 2222) FROM $table")
+          checkSparkAnswerAndOperator(
+            s"SELECT if (c2 == 'comet', 'native execution', 'non-native execution') FROM $table")
+        }
+      }
+    }
+  }
+
+  test("basic arithmetic") {
+    withSQLConf("parquet.enable.dictionary" -> "false") {
+      withParquetTable((1 until 10).map(i => (i, i + 1)), "tbl", false) {
+        checkSparkAnswerAndOperator("SELECT _1 + _2, _1 - _2, _1 * _2, _1 / _2, _1 % _2 FROM tbl")
+      }
+    }
+
+    withSQLConf("parquet.enable.dictionary" -> "false") {
+      withParquetTable((1 until 10).map(i => (i.toFloat, i.toFloat + 0.5)), "tbl", false) {
+        checkSparkAnswerAndOperator("SELECT _1 + _2, _1 - _2, _1 * _2, _1 / _2, _1 % _2 FROM tbl")
+      }
+    }
+
+    withSQLConf("parquet.enable.dictionary" -> "false") {
+      withParquetTable((1 until 10).map(i => (i.toDouble, i.toDouble + 0.5d)), "tbl", false) {
+        checkSparkAnswerAndOperator("SELECT _1 + _2, _1 - _2, _1 * _2, _1 / _2, _1 % _2 FROM tbl")
+      }
+    }
+  }
+
+  test("date partition column does not forget date type") {
+    withTable("t1") {
+      sql("CREATE TABLE t1(flag LONG, cal_dt DATE) USING PARQUET PARTITIONED BY (cal_dt)")
+      sql("""
+            |INSERT INTO t1 VALUES
+            |(2, date'2021-06-27'),
+            |(2, date'2021-06-28'),
+            |(2, date'2021-06-29'),
+            |(2, date'2021-06-30')""".stripMargin)
+      checkSparkAnswerAndOperator(sql("SELECT CAST(cal_dt as STRING) FROM t1"))
+      checkSparkAnswer("SHOW PARTITIONS t1")
+    }
+  }
+
+  test("Year") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col timestamp) using parquet")
+          sql(s"insert into $table values (now()), (null)")
+          checkSparkAnswerAndOperator(s"SELECT year(col) FROM $table")
+        }
+      }
+    }
+  }
+
+  test("Decimal binary ops multiply is aligned to Spark") {
+    assume(isSpark34Plus)
+    Seq(true, false).foreach { allowPrecisionLoss =>
+      withSQLConf(
+        "spark.sql.decimalOperations.allowPrecisionLoss" -> allowPrecisionLoss.toString) {
+
+        testSingleLineQuery(
+          "select cast(1.23456 as decimal(10,9)) c1, cast(2.345678 as decimal(10,9)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * 2.345678 a, c2 * c1 b from tbl)",
+          s"basic_positive_numbers (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(1.23456 as decimal(10,9)) c1, cast(-2.345678 as decimal(10,9)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * -2.345678 a, c2 * c1 b from tbl)",
+          s"basic_neg_numbers (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(1.23456 as decimal(10,9)) c1, cast(0 as decimal(10,9)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * 0.0 a, c2 * c1 b from tbl)",
+          s"zero (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(1.23456 as decimal(10,9)) c1, cast(1 as decimal(10,9)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * 1.0 a, c2 * c1 b from tbl)",
+          s"identity (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(123456789.1234567890 as decimal(20,10)) c1, cast(987654321.9876543210 as decimal(20,10)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * cast(987654321.9876543210 as decimal(20,10)) a, c2 * c1 b from tbl)",
+          s"large_numbers (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(0.00000000123456789 as decimal(20,19)) c1, cast(0.00000000987654321 as decimal(20,19)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * cast(0.00000000987654321 as decimal(20,19)) a, c2 * c1 b from tbl)",
+          s"small_numbers (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(64053151420411946063694043751862251568 as decimal(38,0)) c1, cast(12345 as decimal(10,0)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * cast(12345 as decimal(10,0)) a, c2 * c1 b from tbl)",
+          s"overflow_precision (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          "select cast(6.4053151420411946063694043751862251568 as decimal(38,37)) c1, cast(1.2345 as decimal(10,9)) c2",
+          "select a, b, typeof(a), typeof(b) from (select c1 * cast(1.2345 as decimal(10,9)) a, c2 * c1 b from tbl)",
+          s"overflow_scale (allowPrecisionLoss = ${allowPrecisionLoss})")
+
+        testSingleLineQuery(
+          """
+            |select cast(6.4053151420411946063694043751862251568 as decimal(38,37)) c1, cast(1.2345 as decimal(10,9)) c2
+            |union all
+            |select cast(1.23456 as decimal(10,9)) c1, cast(1 as decimal(10,9)) c2
+            |""".stripMargin,
+          "select a, typeof(a) from (select c1 * c2 a from tbl)",
+          s"mixed_errs_and_results (allowPrecisionLoss = ${allowPrecisionLoss})")
+      }
+    }
+  }
+
+  // tests one liner query without necessity to create external table
+  def testSingleLineQuery(
+      prepareQuery: String,
+      testQuery: String,
+      testName: String = "test",
+      tableName: String = "tbl"): Unit = {
+
+    withTempDir { dir =>
+      val path = new Path(dir.toURI.toString, testName).toUri.toString
+      var data: java.util.List[Row] = new util.ArrayList()
+      var schema: StructType = null
+
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark.sql(prepareQuery)
+        data = df.collectAsList()
+        schema = df.schema
+      }
+
+      spark.createDataFrame(data, schema).repartition(1).write.parquet(path)
+      readParquetFile(path, Some(schema)) { df => df.createOrReplaceTempView(tableName) }
+      checkSparkAnswerAndOperator(testQuery)
+    }
+  }
+
+  test("Decimal random number tests") {
+    val rand = scala.util.Random
+    def makeNum(p: Int, s: Int): String = {
+      val int1 = rand.nextLong()
+      val int2 = rand.nextLong().abs
+      val frac1 = rand.nextLong().abs
+      val frac2 = rand.nextLong().abs
+      s"$int1$int2".take(p - s + (int1 >>> 63).toInt) + "." + s"$frac1$frac2".take(s)
+    }
+
+    val table = "test"
+    (0 until 10).foreach { _ =>
+      val p1 = rand.nextInt(38) + 1 // 1 <= p1 <= 38
+      val s1 = rand.nextInt(p1 + 1) // 0 <= s1 <= p1
+      val p2 = rand.nextInt(38) + 1
+      val s2 = rand.nextInt(p2 + 1)
+
+      withTable(table) {
+        sql(s"create table $table(a decimal($p1, $s1), b decimal($p2, $s2)) using parquet")
+        val values =
+          (0 until 10).map(_ => s"(${makeNum(p1, s1)}, ${makeNum(p2, s2)})").mkString(",")
+        sql(s"insert into $table values $values")
+        Seq(true, false).foreach { allowPrecisionLoss =>
+          withSQLConf(
+            "spark.sql.decimalOperations.allowPrecisionLoss" -> allowPrecisionLoss.toString) {
+            val a = makeNum(p1, s1)
+            val b = makeNum(p2, s2)
+            var ops = Seq("+", "-")
+            if (isSpark34Plus) {
+              // These operations are only supported in Spark 3.4+
+              ops = ops ++ Seq("*", "/", "%")
+            }
+            for (op <- ops) {
+              checkSparkAnswerAndOperator(s"select a, b, a $op b from $table")
+              checkSparkAnswerAndOperator(s"select $a, b, $a $op b from $table")
+              checkSparkAnswerAndOperator(s"select a, $b, a $op $b from $table")
+              checkSparkAnswerAndOperator(
+                s"select $a, $b, decimal($a) $op decimal($b) from $table")
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/comet/CometSparkSessionExtensionsSuite.scala b/spark/src/test/scala/org/apache/comet/CometSparkSessionExtensionsSuite.scala
new file mode 100644
index 000000000..2c818b97a
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/CometSparkSessionExtensionsSuite.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class CometSparkSessionExtensionsSuite extends CometTestBase {
+  test("unsupported Spark types") {
+    Seq(
+      NullType -> false,
+      BooleanType -> true,
+      ByteType -> true,
+      ShortType -> true,
+      IntegerType -> true,
+      LongType -> true,
+      FloatType -> true,
+      DoubleType -> true,
+      BinaryType -> true,
+      StringType -> true,
+      ArrayType(TimestampType) -> false,
+      StructType(
+        Seq(
+          StructField("f1", DecimalType.SYSTEM_DEFAULT),
+          StructField("f2", StringType))) -> false,
+      MapType(keyType = LongType, valueType = DateType) -> false,
+      StructType(Seq(StructField("f1", ByteType), StructField("f2", StringType))) -> false,
+      MapType(keyType = IntegerType, valueType = BinaryType) -> false).foreach {
+      case (dt, expected) =>
+        assert(CometSparkSessionExtensions.isTypeSupported(dt) == expected)
+    }
+  }
+
+  test("unsupported Spark schema") {
+    Seq(
+      Seq(StructField("f1", IntegerType), StructField("f2", BooleanType)) -> true,
+      Seq(StructField("f1", IntegerType), StructField("f2", ArrayType(IntegerType))) -> false,
+      Seq(
+        StructField("f1", MapType(keyType = LongType, valueType = StringType)),
+        StructField("f2", ArrayType(DoubleType))) -> false).foreach { case (schema, expected) =>
+      assert(CometSparkSessionExtensions.isSchemaSupported(StructType(schema)) == expected)
+    }
+  }
+
+  test("isCometEnabled") {
+    val conf = new SQLConf
+
+    conf.setConfString(CometConf.COMET_ENABLED.key, "false")
+    assert(!CometSparkSessionExtensions.isCometEnabled(conf))
+
+    // Since the native lib is probably already loaded due to previous tests, we reset it here
+    NativeBase.setLoaded(false)
+
+    conf.setConfString(CometConf.COMET_ENABLED.key, "true")
+    val oldProperty = System.getProperty("os.name")
+    System.setProperty("os.name", "foo")
+    assert(!CometSparkSessionExtensions.isCometEnabled(conf))
+
+    System.setProperty("os.name", oldProperty)
+
+    conf.setConf(SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION, true)
+    assert(!CometSparkSessionExtensions.isCometEnabled(conf))
+
+    // Restore the original state
+    NativeBase.setLoaded(true)
+  }
+
+  test("Arrow properties") {
+    NativeBase.setLoaded(false)
+    NativeBase.load()
+
+    assert(System.getProperty(NativeBase.ARROW_UNSAFE_MEMORY_ACCESS) == "true")
+    assert(System.getProperty(NativeBase.ARROW_NULL_CHECK_FOR_GET) == "false")
+
+    System.setProperty(NativeBase.ARROW_UNSAFE_MEMORY_ACCESS, "false")
+    NativeBase.setLoaded(false)
+    NativeBase.load()
+    assert(System.getProperty(NativeBase.ARROW_UNSAFE_MEMORY_ACCESS) == "false")
+
+    // Should not enable when debug mode is on
+    System.clearProperty(NativeBase.ARROW_UNSAFE_MEMORY_ACCESS)
+    SQLConf.get.setConfString(CometConf.COMET_DEBUG_ENABLED.key, "true")
+    NativeBase.setLoaded(false)
+    NativeBase.load()
+    assert(System.getProperty(NativeBase.ARROW_UNSAFE_MEMORY_ACCESS) == null)
+
+    // Restore the original state
+    NativeBase.setLoaded(true)
+    SQLConf.get.setConfString(CometConf.COMET_DEBUG_ENABLED.key, "false")
+  }
+
+  def getBytesFromMib(mib: Long): Long = mib * 1024 * 1024
+
+  test("Minimum Comet memory overhead") {
+    val conf = new SparkConf()
+    assert(
+      CometSparkSessionExtensions
+        .getCometMemoryOverhead(conf) == getBytesFromMib(384))
+  }
+
+  test("Comet memory overhead factor with executor memory") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.executor.memory", "16g")
+    sparkConf.set(CometConf.COMET_MEMORY_OVERHEAD_FACTOR.key, "0.5")
+
+    assert(
+      CometSparkSessionExtensions
+        .getCometMemoryOverhead(sparkConf) == getBytesFromMib(8 * 1024))
+  }
+
+  test("Comet memory overhead factor with default executor memory") {
+    val sparkConf = new SparkConf()
+    sparkConf.set(CometConf.COMET_MEMORY_OVERHEAD_FACTOR.key, "0.5")
+    assert(
+      CometSparkSessionExtensions
+        .getCometMemoryOverhead(sparkConf) == getBytesFromMib(512))
+  }
+
+  test("Comet memory overhead") {
+    val sparkConf = new SparkConf()
+    sparkConf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "10g")
+    assert(
+      CometSparkSessionExtensions
+        .getCometMemoryOverhead(sparkConf) == getBytesFromMib(1024 * 10))
+  }
+}
diff --git a/spark/src/test/scala/org/apache/comet/TestUtils.scala b/spark/src/test/scala/org/apache/comet/TestUtils.scala
new file mode 100644
index 000000000..d4e771568
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/TestUtils.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.io.File
+
+import scala.collection.mutable.ArrayBuffer
+
+object TestUtils {
+
+  /**
+   * Spark 3.3.0 moved {{{SpecificParquetRecordReaderBase.listDirectory}}} to
+   * {{{org.apache.spark.TestUtils.listDirectory}}}. Copying it here to bridge the difference
+   * between Spark 3.2.0 and 3.3.0 TODO: remove after dropping Spark 3.2.0 support and use
+   * {{{org.apache.spark.TestUtils.listDirectory}}}
+   */
+  def listDirectory(path: File): Array[String] = {
+    val result = ArrayBuffer.empty[String]
+    if (path.isDirectory) {
+      path.listFiles.foreach(f => result.appendAll(listDirectory(f)))
+    } else {
+      val c = path.getName.charAt(0)
+      if (c != '.' && c != '_') result.append(path.getAbsolutePath)
+    }
+    result.toArray
+  }
+}
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
new file mode 100644
index 000000000..f0cc96a6b
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.exec
+
+import scala.util.Random
+
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.example.data.simple.SimpleGroup
+import org.apache.parquet.schema.MessageTypeParser
+import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
+import org.apache.spark.sql.comet.CometHashAggregateExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+/**
+ * Test suite dedicated to Comet native aggregate operator
+ */
+class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
+
+  test("Fix NPE in partial decimal sum") {
+    val table = "tbl"
+    withTable(table) {
+      withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+        withTable(table) {
+          sql(s"CREATE TABLE $table(col DECIMAL(5, 2)) USING PARQUET")
+          sql(s"INSERT INTO TABLE $table VALUES (CAST(12345.01 AS DECIMAL(5, 2)))")
+          val df = sql(s"SELECT SUM(col + 100000.01) FROM $table")
+          checkAnswer(df, Row(null))
+        }
+      }
+    }
+  }
+
+  test("trivial case") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable((0 until 5).map(i => (i, i)), "tbl", dictionaryEnabled) {
+        val df1 = sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df1, Row(0, 0) :: Row(1, 1) :: Row(2, 2) :: Row(3, 3) :: Row(4, 4) :: Nil)
+
+        val df2 = sql("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df2, Row(0, 1) :: Row(1, 1) :: Row(2, 1) :: Row(3, 1) :: Row(4, 1) :: Nil)
+
+        val df3 = sql("SELECT COUNT(_1), COUNT(_2) FROM tbl")
+        checkAnswer(df3, Row(5, 5) :: Nil)
+
+        checkSparkAnswer("SELECT _2, MIN(_1), MAX(_1) FROM tbl GROUP BY _2")
+      }
+    }
+  }
+
+  test("avg") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 10).map(i => ((i + 1) * (i + 1), (i + 1) / 2)),
+        "tbl",
+        dictionaryEnabled) {
+
+        checkSparkAnswer("SELECT _2, AVG(_1) FROM tbl GROUP BY _2")
+        checkSparkAnswer("SELECT AVG(_2) FROM tbl")
+      }
+    }
+  }
+
+  test("count, avg with null") {
+    Seq(false, true).foreach { dictionary =>
+      withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
+        val table = "test"
+        withTable(table) {
+          sql(s"create table $table(col1 int, col2 int) using parquet")
+          sql(s"insert into $table values(1, 1), (2, 1), (3, 2), (null, 2), (null, 1)")
+          checkSparkAnswer(s"SELECT COUNT(col1) FROM $table")
+          checkSparkAnswer(s"SELECT col2, COUNT(col1) FROM $table GROUP BY col2")
+          checkSparkAnswer(s"SELECT avg(col1) FROM $table")
+          checkSparkAnswer(s"SELECT col2, avg(col1) FROM $table GROUP BY col2")
+        }
+      }
+    }
+  }
+
+  test("SUM/AVG non-decimal overflow") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(Seq((0, 100.toLong), (0, Long.MaxValue)), "tbl", dictionaryEnabled) {
+        checkSparkAnswer("SELECT SUM(_2) FROM tbl GROUP BY _1")
+        checkSparkAnswer("SELECT AVG(_2) FROM tbl GROUP BY _1")
+      }
+    }
+  }
+
+  test("simple SUM, COUNT, MIN, MAX, AVG with non-distinct group keys") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable((0 until 5).map(i => (i, i % 2)), "tbl", dictionaryEnabled) {
+        val df1 = sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df1, Row(0, 6) :: Row(1, 4) :: Nil)
+        val df2 = sql("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df2, Row(0, 3) :: Row(1, 2) :: Nil)
+        checkSparkAnswer("SELECT _2, MIN(_1), MAX(_1) FROM tbl GROUP BY _2")
+        checkSparkAnswer("SELECT _2, AVG(_1) FROM tbl GROUP BY _2")
+      }
+    }
+  }
+
+  test("group-by on variable length types") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable((0 until 100).map(i => (i, (i % 10).toString)), "tbl", dictionaryEnabled) {
+        val n = 1
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, SUM(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, MIN(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, MAX(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, AVG(_1) FROM tbl GROUP BY _2", n)
+      }
+    }
+  }
+
+  test("simple SUM, COUNT, MIN, MAX, AVG with non-distinct + null group keys") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 10).map { i =>
+          (i, if (i % 3 == 0) null.asInstanceOf[Int] else i % 3)
+        },
+        "tbl",
+        dictionaryEnabled) {
+        val df1 = sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df1, Row(null.asInstanceOf[Int], 18) :: Row(1, 12) :: Row(2, 15) :: Nil)
+
+        val df2 = sql("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df2, Row(null.asInstanceOf[Int], 4) :: Row(1, 3) :: Row(2, 3) :: Nil)
+
+        val df3 = sql("SELECT _2, MIN(_1), MAX(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df3, Row(null.asInstanceOf[Int], 0, 9) :: Row(1, 1, 7) :: Row(2, 2, 8) :: Nil)
+        checkSparkAnswer(sql("SELECT _2, AVG(_1) FROM tbl GROUP BY _2"))
+      }
+    }
+  }
+
+  test("simple SUM, COUNT, MIN, MAX, AVG with null aggregates") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 10).map { i =>
+          (
+            if (i % 2 == 0) null.asInstanceOf[Int] else i,
+            if (i % 3 == 0) null.asInstanceOf[Int] else i % 3)
+        },
+        "tbl",
+        dictionaryEnabled) {
+        val df1 = sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df1, Row(null.asInstanceOf[Int], 12) :: Row(1, 8) :: Row(2, 5) :: Nil)
+
+        val df2 = sql("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df2, Row(null.asInstanceOf[Int], 4) :: Row(1, 3) :: Row(2, 3) :: Nil)
+
+        val df3 = sql("SELECT _2, MIN(_1), MAX(_1) FROM tbl GROUP BY _2")
+        checkAnswer(df3, Row(null.asInstanceOf[Int], 0, 9) :: Row(1, 0, 7) :: Row(2, 0, 5) :: Nil)
+
+        checkSparkAnswer(sql("SELECT _2, AVG(_1) FROM tbl GROUP BY _2"))
+      }
+    }
+  }
+
+  test("simple SUM, MIN, MAX, AVG with all nulls") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 10).map { i =>
+          (null.asInstanceOf[Int], if (i % 3 == 0) null.asInstanceOf[Int] else i % 3)
+        },
+        "tbl",
+        dictionaryEnabled) {
+        val df = sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2")
+        checkAnswer(
+          df,
+          Seq(
+            Row(null.asInstanceOf[Int], null.asInstanceOf[Int]),
+            Row(1, null.asInstanceOf[Int]),
+            Row(2, null.asInstanceOf[Int])))
+
+        val df2 = sql("SELECT _2, MIN(_1), MAX(_1) FROM tbl GROUP BY _2")
+        checkAnswer(
+          df2,
+          Seq(
+            Row(null.asInstanceOf[Int], null.asInstanceOf[Int], null.asInstanceOf[Int]),
+            Row(1, null.asInstanceOf[Int], null.asInstanceOf[Int]),
+            Row(2, null.asInstanceOf[Int], null.asInstanceOf[Int])))
+        checkSparkAnswer(sql("SELECT _2, SUM(_1) FROM tbl GROUP BY _2"))
+      }
+    }
+  }
+
+  test("SUM, COUNT, MIN, MAX, AVG on float & double") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 10, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          checkSparkAnswer(
+            "SELECT _g5, SUM(_5), COUNT(_5), MIN(_5), MAX(_5), AVG(_5) FROM tbl GROUP BY _g5")
+          checkSparkAnswer(
+            "SELECT _g6, SUM(_6), COUNT(_6), MIN(_6), MAX(_6), AVG(_6) FROM tbl GROUP BY _g6")
+        }
+      }
+    }
+  }
+
+  test("SUM, MIN, MAX, AVG for NaN, -0.0 and 0.0") {
+    // NaN should be grouped together, and -0.0 and 0.0 should also be grouped together
+    Seq(true, false).foreach { dictionaryEnabled =>
+      val data: Seq[(Float, Int)] = Seq(
+        (Float.NaN, 1),
+        (-0.0.asInstanceOf[Float], 2),
+        (0.0.asInstanceOf[Float], 3),
+        (Float.NaN, 4))
+      withParquetTable(data, "tbl", dictionaryEnabled) {
+        checkSparkAnswer("SELECT SUM(_2), MIN(_2), MAX(_2), _1 FROM tbl GROUP BY _1")
+        checkSparkAnswer("SELECT MIN(_1), MAX(_1), MIN(_2), MAX(_2) FROM tbl")
+        checkSparkAnswer("SELECT AVG(_2), _1 FROM tbl GROUP BY _1")
+        checkSparkAnswer("SELECT AVG(_1), AVG(_2) FROM tbl")
+      }
+    }
+  }
+
+  test("SUM/MIN/MAX/AVG on decimal") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 10, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          checkSparkAnswer("SELECT _g1, SUM(_7), MIN(_7), MAX(_7), AVG(_7) FROM tbl GROUP BY _g1")
+          checkSparkAnswer("SELECT _g1, SUM(_8), MIN(_8), MAX(_8), AVG(_8) FROM tbl GROUP BY _g1")
+          checkSparkAnswer("SELECT _g1, SUM(_9), MIN(_9), MAX(_9), AVG(_9) FROM tbl GROUP BY _g1")
+        }
+      }
+    }
+  }
+
+  test("multiple SUM/MIN/MAX/AVG on decimal and non-decimal") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 10, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          checkSparkAnswer(
+            "SELECT _g1, COUNT(_6), COUNT(_7), SUM(_6), SUM(_7), MIN(_6), MIN(_7), MAX(_6), MAX(_7), AVG(_6), AVG(_7) FROM tbl GROUP BY _g1")
+          checkSparkAnswer(
+            "SELECT _g1, COUNT(_7), COUNT(_8), SUM(_7), SUM(_8), MIN(_7), MIN(_8), MAX(_7), MAX(_8), AVG(_7), AVG(_8) FROM tbl GROUP BY _g1")
+          checkSparkAnswer(
+            "SELECT _g1, COUNT(_8), COUNT(_9), SUM(_8), SUM(_9), MIN(_8), MIN(_9), MAX(_8), MAX(_9), AVG(_8), AVG(_9) FROM tbl GROUP BY _g1")
+          checkSparkAnswer(
+            "SELECT _g1, COUNT(_9), COUNT(_1), SUM(_9), SUM(_1), MIN(_9), MIN(_1), MAX(_9), MAX(_1), AVG(_9), AVG(_1) FROM tbl GROUP BY _g1")
+        }
+      }
+    }
+  }
+
+  test("SUM/AVG on decimal with different precisions") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 10, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          Seq("SUM", "AVG").foreach { FN =>
+            checkSparkAnswer(
+              s"SELECT _g1, $FN(_8 + CAST(1 AS DECIMAL(20, 10))) FROM tbl GROUP BY _g1")
+            checkSparkAnswer(
+              s"SELECT _g1, $FN(_8 - CAST(-1 AS DECIMAL(10, 3))) FROM tbl GROUP BY _g1")
+            checkSparkAnswer(
+              s"SELECT _g1, $FN(_9 * CAST(3.14 AS DECIMAL(4, 3))) FROM tbl GROUP BY _g1")
+            checkSparkAnswer(
+              s"SELECT _g1, $FN(_9 / CAST(1.2345 AS DECIMAL(35, 10))) FROM tbl GROUP BY _g1")
+          }
+        }
+      }
+    }
+  }
+
+  test("SUM decimal with DF") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 20, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          val expectedNumOfCometAggregates = 1
+
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT _g2, SUM(_7) FROM tbl GROUP BY _g2",
+            expectedNumOfCometAggregates)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT _g3, SUM(_8) FROM tbl GROUP BY _g3",
+            expectedNumOfCometAggregates)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT _g4, SUM(_9) FROM tbl GROUP BY _g4",
+            expectedNumOfCometAggregates)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT SUM(_7) FROM tbl",
+            expectedNumOfCometAggregates)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT SUM(_8) FROM tbl",
+            expectedNumOfCometAggregates)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT SUM(_9) FROM tbl",
+            expectedNumOfCometAggregates)
+        }
+      }
+    }
+  }
+
+  test("COUNT/MIN/MAX on date, timestamp") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 10, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          checkSparkAnswer("SELECT _g1, COUNT(_10), MIN(_10), MAX(_10) FROM tbl GROUP BY _g1")
+          checkSparkAnswer("SELECT _g1, COUNT(_11), MIN(_11), MAX(_11) FROM tbl GROUP BY _g1")
+          checkSparkAnswer("SELECT _g1, COUNT(_12), MIN(_12), MAX(_12) FROM tbl GROUP BY _g1")
+        }
+      }
+    }
+  }
+
+  test("single group-by column + aggregate column, multiple batches, no null") {
+    val numValues = 10000
+
+    Seq(1, 100, 10000).foreach { numGroups =>
+      Seq(128, 1024, numValues + 1).foreach { batchSize =>
+        Seq(true, false).foreach { dictionaryEnabled =>
+          withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+            withParquetTable(
+              (0 until numValues).map(i => (i, Random.nextInt() % numGroups)),
+              "tbl",
+              dictionaryEnabled) {
+              checkSparkAnswer(
+                "SELECT _2, SUM(_1), MIN(_1), MAX(_1), COUNT(_1), AVG(_1) FROM tbl GROUP BY _2")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("multiple group-by columns + single aggregate column, with nulls") {
+    val numValues = 10000
+
+    Seq(1, 100, numValues).foreach { numGroups =>
+      Seq(128, numValues + 100).foreach { batchSize =>
+        Seq(true, false).foreach { dictionaryEnabled =>
+          withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+            withTempPath { dir =>
+              val path = new Path(dir.toURI.toString, "test.parquet")
+              makeParquetFile(path, numValues, numGroups, dictionaryEnabled)
+              withParquetTable(path.toUri.toString, "tbl") {
+                checkSparkAnswer("SELECT _g1, _g2, SUM(_3) FROM tbl GROUP BY _g1, _g2")
+                checkSparkAnswer("SELECT _g1, _g2, COUNT(_3) FROM tbl GROUP BY _g1, _g2")
+                checkSparkAnswer("SELECT _g1, _g2, MIN(_3), MAX(_3) FROM tbl GROUP BY _g1, _g2")
+                checkSparkAnswer("SELECT _g1, _g2, AVG(_3) FROM tbl GROUP BY _g1, _g2")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("string should be supported") {
+    withTable("t") {
+      sql("CREATE TABLE t(v VARCHAR(3), i INT) USING PARQUET")
+      sql("INSERT INTO t VALUES ('c', 1)")
+      checkSparkAnswerAndNumOfAggregates("SELECT v, sum(i) FROM t GROUP BY v ORDER BY v", 1)
+    }
+  }
+
+  test("multiple group-by columns + multiple aggregate column, with nulls") {
+    val numValues = 10000
+
+    Seq(1, 100, numValues).foreach { numGroups =>
+      Seq(128, numValues + 100).foreach { batchSize =>
+        Seq(true, false).foreach { dictionaryEnabled =>
+          withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+            withTempPath { dir =>
+              val path = new Path(dir.toURI.toString, "test.parquet")
+              makeParquetFile(path, numValues, numGroups, dictionaryEnabled)
+              withParquetTable(path.toUri.toString, "tbl") {
+                checkSparkAnswer("SELECT _g3, _g4, SUM(_3), SUM(_4) FROM tbl GROUP BY _g3, _g4")
+                checkSparkAnswer(
+                  "SELECT _g3, _g4, COUNT(_3), COUNT(_4) FROM tbl GROUP BY _g3, _g4")
+                checkSparkAnswer(
+                  "SELECT _g3, _g4, MIN(_3), MAX(_3), MIN(_4), MAX(_4) FROM tbl GROUP BY _g3, _g4")
+                checkSparkAnswer("SELECT _g3, _g4, AVG(_3), AVG(_4) FROM tbl GROUP BY _g3, _g4")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("all types, with nulls") {
+    val numValues = 2048
+
+    Seq(1, 100, numValues).foreach { numGroups =>
+      Seq(true, false).foreach { dictionaryEnabled =>
+        withTempPath { dir =>
+          val path = new Path(dir.toURI.toString, "test.parquet")
+          makeParquetFile(path, numValues, numGroups, dictionaryEnabled)
+          withParquetTable(path.toUri.toString, "tbl") {
+            Seq(128, numValues + 100).foreach { batchSize =>
+              withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+
+                // Test all combinations of different aggregation & group-by types
+                (1 to 4).foreach { col =>
+                  (1 to 14).foreach { gCol =>
+                    checkSparkAnswer(s"SELECT _g$gCol, SUM(_$col) FROM tbl GROUP BY _g$gCol")
+                    checkSparkAnswer(s"SELECT _g$gCol, COUNT(_$col) FROM tbl GROUP BY _g$gCol")
+                    checkSparkAnswer(
+                      s"SELECT _g$gCol, MIN(_$col), MAX(_$col) FROM tbl GROUP BY _g$gCol")
+                    checkSparkAnswer(s"SELECT _g$gCol, AVG(_$col) FROM tbl GROUP BY _g$gCol")
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("test final count") {
+    Seq(false, true).foreach { dictionaryEnabled =>
+      val n = 1
+      withParquetTable((0 until 5).map(i => (i, i % 2)), "tbl", dictionaryEnabled) {
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, COUNT(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("select count(_1) from tbl", n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, COUNT(_1), SUM(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates("SELECT COUNT(_1), COUNT(_2) FROM tbl", n)
+      }
+    }
+  }
+
+  test("test final min/max") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable((0 until 5).map(i => (i, i % 2)), "tbl", dictionaryEnabled) {
+        val n = 1
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1), MAX(_1), COUNT(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates("SELECT MIN(_1), MAX(_1), COUNT(_1) FROM tbl", 1)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1), MAX(_1), COUNT(_1), SUM(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT MIN(_1), MIN(_2), MAX(_1), MAX(_2), COUNT(_1), COUNT(_2) FROM tbl",
+          n)
+      }
+    }
+  }
+
+  test("test final min/max/count with result expressions") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable((0 until 5).map(i => (i, i % 2)), "tbl", dictionaryEnabled) {
+        val n = 1
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1) + 2, COUNT(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, COUNT(_1) + 2 FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2 + 2, COUNT(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, MIN(_1) + MAX(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, MIN(_1) + _2 FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2 + 2, MIN(_1), MAX(_1), COUNT(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1), MAX(_1) + 2, COUNT(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, SUM(_1) + 2 FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2 + 2, SUM(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, SUM(_1 + 1) FROM tbl GROUP BY _2", n)
+
+        // result expression is unsupported by Comet, so only partial aggregation should be used
+        val df = sql(
+          "SELECT _2, MIN(_1) + java_method('java.lang.Math', 'random') " +
+            "FROM tbl GROUP BY _2")
+        assert(getNumCometHashAggregate(df) == 1)
+      }
+    }
+  }
+
+  test("test final sum") {
+    Seq(false, true).foreach { dictionaryEnabled =>
+      val n = 1
+      withParquetTable((0L until 5L).map(i => (i, i % 2)), "tbl", dictionaryEnabled) {
+        checkSparkAnswerAndNumOfAggregates("SELECT _2, SUM(_1), MIN(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT SUM(_1) FROM tbl", n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1), MAX(_1), COUNT(_1), SUM(_1), AVG(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT MIN(_1), MIN(_2), MAX(_1), MAX(_2), COUNT(_1), COUNT(_2), SUM(_1), SUM(_2) FROM tbl",
+          n)
+      }
+    }
+  }
+
+  test("test final avg") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 5).map(i => (i.toDouble, i.toDouble % 2)),
+        "tbl",
+        dictionaryEnabled) {
+        val n = 1
+        checkSparkAnswerAndNumOfAggregates("SELECT _2 , AVG(_1) FROM tbl GROUP BY _2", n)
+        checkSparkAnswerAndNumOfAggregates("SELECT AVG(_1) FROM tbl", n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT _2, MIN(_1), MAX(_1), COUNT(_1), SUM(_1), AVG(_1) FROM tbl GROUP BY _2",
+          n)
+        checkSparkAnswerAndNumOfAggregates(
+          "SELECT MIN(_1), MIN(_2), MAX(_1), MAX(_2), COUNT(_1), COUNT(_2), SUM(_1), SUM(_2), AVG(_1), AVG(_2) FROM tbl",
+          n)
+      }
+    }
+  }
+
+  test("final decimal avg") {
+    // TODO: enable decimal average for Spark 3.2 & 3.3
+    assume(isSpark34Plus)
+
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withSQLConf("parquet.enable.dictionary" -> dictionaryEnabled.toString) {
+        val table = "t1"
+        withTable(table) {
+          sql(s"create table $table(a decimal(38, 37), b INT) using parquet")
+          sql(s"insert into $table values(-0.0000000000000000000000000000000000002, 1)")
+          sql(s"insert into $table values(-0.0000000000000000000000000000000000002, 1)")
+          sql(s"insert into $table values(-0.0000000000000000000000000000000000004, 2)")
+          sql(s"insert into $table values(-0.0000000000000000000000000000000000004, 2)")
+          sql(s"insert into $table values(-0.00000000000000000000000000000000000002, 3)")
+          sql(s"insert into $table values(-0.00000000000000000000000000000000000002, 3)")
+          sql(s"insert into $table values(-0.00000000000000000000000000000000000004, 4)")
+          sql(s"insert into $table values(-0.00000000000000000000000000000000000004, 4)")
+          sql(s"insert into $table values(0.13344406545919155429936259114971302408, 5)")
+          sql(s"insert into $table values(0.13344406545919155429936259114971302408, 5)")
+
+          val n = 1
+          checkSparkAnswerAndNumOfAggregates("SELECT b , AVG(a) FROM t1 GROUP BY b", n)
+          checkSparkAnswerAndNumOfAggregates("SELECT AVG(a) FROM t1", n)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1 GROUP BY b",
+            n)
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1",
+            n)
+        }
+      }
+    }
+  }
+
+  test("test partial avg") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withParquetTable(
+        (0 until 5).map(i => (i.toDouble, i.toDouble % 2)),
+        "tbl",
+        dictionaryEnabled) {
+        checkSparkAnswerAndNumOfAggregates("SELECT _2 , AVG(_1) FROM tbl GROUP BY _2", 1)
+      }
+    }
+  }
+
+  test("avg null handling") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"create table $table(a double, b double) using parquet")
+      sql(s"insert into $table values(1, 1.0)")
+      sql(s"insert into $table values(null, null)")
+      sql(s"insert into $table values(1, 2.0)")
+      sql(s"insert into $table values(null, null)")
+      sql(s"insert into $table values(2, null)")
+      sql(s"insert into $table values(2, null)")
+
+      val query = sql(s"select a, AVG(b) from $table GROUP BY a")
+      checkSparkAnswer(query)
+    }
+  }
+
+  test("Decimal Avg with DF") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        makeParquetFile(path, 1000, 20, dictionaryEnabled)
+        withParquetTable(path.toUri.toString, "tbl") {
+          val expectedNumOfCometAggregates = 1
+
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT _g2, AVG(_7) FROM tbl GROUP BY _g2",
+            expectedNumOfCometAggregates)
+
+          checkSparkAnswerWithTol("SELECT _g3, AVG(_8) FROM tbl GROUP BY _g3")
+          assert(
+            getNumCometHashAggregate(
+              sql("SELECT _g3, AVG(_8) FROM tbl GROUP BY _g3")) == expectedNumOfCometAggregates)
+
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT _g4, AVG(_9) FROM tbl GROUP BY _g4",
+            expectedNumOfCometAggregates)
+
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT AVG(_7) FROM tbl",
+            expectedNumOfCometAggregates)
+
+          checkSparkAnswerWithTol("SELECT AVG(_8) FROM tbl")
+          assert(
+            getNumCometHashAggregate(
+              sql("SELECT AVG(_8) FROM tbl")) == expectedNumOfCometAggregates)
+
+          checkSparkAnswerAndNumOfAggregates(
+            "SELECT AVG(_9) FROM tbl",
+            expectedNumOfCometAggregates)
+        }
+      }
+    }
+  }
+
+  protected def checkSparkAnswerAndNumOfAggregates(query: String, numAggregates: Int): Unit = {
+    val df = sql(query)
+    checkSparkAnswer(df)
+    val actualNumAggregates = getNumCometHashAggregate(df)
+    assert(
+      actualNumAggregates == numAggregates,
+      s"Expected $numAggregates Comet aggregate operators, but found $actualNumAggregates")
+  }
+
+  def getNumCometHashAggregate(df: DataFrame): Int = {
+    val sparkPlan = stripAQEPlan(df.queryExecution.executedPlan)
+    sparkPlan.collect { case s: CometHashAggregateExec => s }.size
+  }
+
+  def makeParquetFile(
+      path: Path,
+      total: Int,
+      numGroups: Int,
+      dictionaryEnabled: Boolean): Unit = {
+    val schemaStr =
+      """
+        |message root {
+        |  optional INT32                    _1(INT_8);
+        |  optional INT32                    _2(INT_16);
+        |  optional INT32                    _3;
+        |  optional INT64                    _4;
+        |  optional FLOAT                    _5;
+        |  optional DOUBLE                   _6;
+        |  optional INT32                    _7(DECIMAL(5, 2));
+        |  optional INT64                    _8(DECIMAL(18, 10));
+        |  optional FIXED_LEN_BYTE_ARRAY(16) _9(DECIMAL(38, 37));
+        |  optional INT64                    _10(TIMESTAMP(MILLIS,true));
+        |  optional INT64                    _11(TIMESTAMP(MICROS,true));
+        |  optional INT32                    _12(DATE);
+        |  optional INT32                    _g1(INT_8);
+        |  optional INT32                    _g2(INT_16);
+        |  optional INT32                    _g3;
+        |  optional INT64                    _g4;
+        |  optional FLOAT                    _g5;
+        |  optional DOUBLE                   _g6;
+        |  optional INT32                    _g7(DECIMAL(5, 2));
+        |  optional INT64                    _g8(DECIMAL(18, 10));
+        |  optional FIXED_LEN_BYTE_ARRAY(16) _g9(DECIMAL(38, 37));
+        |  optional INT64                    _g10(TIMESTAMP(MILLIS,true));
+        |  optional INT64                    _g11(TIMESTAMP(MICROS,true));
+        |  optional INT32                    _g12(DATE);
+        |  optional BINARY                   _g13(UTF8);
+        |  optional BINARY                   _g14;
+        |}
+      """.stripMargin
+
+    val schema = MessageTypeParser.parseMessageType(schemaStr)
+    val writer = createParquetWriter(schema, path, dictionaryEnabled = true)
+
+    val rand = scala.util.Random
+    val expected = (0 until total).map { i =>
+      // use a single value for the first page, to make sure dictionary encoding kicks in
+      if (rand.nextBoolean()) None
+      else {
+        if (dictionaryEnabled) Some(i % 10) else Some(i)
+      }
+    }
+
+    expected.foreach { opt =>
+      val record = new SimpleGroup(schema)
+      opt match {
+        case Some(i) =>
+          record.add(0, i.toByte)
+          record.add(1, i.toShort)
+          record.add(2, i)
+          record.add(3, i.toLong)
+          record.add(4, rand.nextFloat())
+          record.add(5, rand.nextDouble())
+          record.add(6, i)
+          record.add(7, i.toLong)
+          record.add(8, (i % 10).toString * 16)
+          record.add(9, i.toLong)
+          record.add(10, i.toLong)
+          record.add(11, i)
+          record.add(12, i.toByte % numGroups)
+          record.add(13, i.toShort % numGroups)
+          record.add(14, i % numGroups)
+          record.add(15, i.toLong % numGroups)
+          record.add(16, rand.nextFloat())
+          record.add(17, rand.nextDouble())
+          record.add(18, i)
+          record.add(19, i.toLong)
+          record.add(20, (i % 10).toString * 16)
+          record.add(21, i.toLong)
+          record.add(22, i.toLong)
+          record.add(23, i)
+          record.add(24, (i % 10).toString * 24)
+          record.add(25, (i % 10).toString * 36)
+        case _ =>
+      }
+      writer.write(record)
+    }
+
+    writer.close()
+  }
+}
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
new file mode 100644
index 000000000..c8e31ef4c
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -0,0 +1,804 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.exec
+
+import scala.util.Random
+
+import org.apache.hadoop.fs.Path
+import org.apache.spark.sql.{AnalysisException, Column, CometTestBase, DataFrame, DataFrameWriter, Row, SaveMode}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.expressions.Hex
+import org.apache.spark.sql.comet.{CometFilterExec, CometHashAggregateExec, CometProjectExec, CometScanExec}
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec, CartesianProductExec, SortMergeJoinExec}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
+import org.apache.spark.unsafe.types.UTF8String
+
+import org.apache.comet.CometConf
+
+class CometExecSuite extends CometTestBase {
+  import testImplicits._
+
+  test("scalar subquery") {
+    val dataTypes =
+      Seq(
+        "BOOLEAN",
+        "BYTE",
+        "SHORT",
+        "INT",
+        "BIGINT",
+        "FLOAT",
+        "DOUBLE",
+        // "DATE": TODO: needs to address issue #1364 first
+        // "TIMESTAMP", TODO: needs to address issue #1364 first
+        "STRING",
+        "BINARY",
+        "DECIMAL(38, 10)")
+    dataTypes.map { subqueryType =>
+      withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+        var column1 = s"CAST(max(_1) AS $subqueryType)"
+        if (subqueryType == "BINARY") {
+          // arrow-rs doesn't support casting integer to binary yet.
+          // We added it to upstream but it's not released yet.
+          column1 = "CAST(CAST(max(_1) AS STRING) AS BINARY)"
+        }
+
+        val df1 = sql(s"SELECT (SELECT $column1 FROM tbl) AS a, _1, _2 FROM tbl")
+        checkSparkAnswerAndOperator(df1)
+
+        var column2 = s"CAST(_1 AS $subqueryType)"
+        if (subqueryType == "BINARY") {
+          // arrow-rs doesn't support casting integer to binary yet.
+          // We added it to upstream but it's not released yet.
+          column2 = "CAST(CAST(_1 AS STRING) AS BINARY)"
+        }
+
+        val df2 = sql(s"SELECT _1, _2 FROM tbl WHERE $column2 > (SELECT $column1 FROM tbl)")
+        checkSparkAnswerAndOperator(df2)
+
+        // Non-correlated exists subquery will be rewritten to scalar subquery
+        val df3 = sql(
+          "SELECT * FROM tbl WHERE EXISTS " +
+            s"(SELECT $column2 FROM tbl WHERE _1 > 1)")
+        checkSparkAnswerAndOperator(df3)
+
+        // Null value
+        column1 = s"CAST(NULL AS $subqueryType)"
+        if (subqueryType == "BINARY") {
+          column1 = "CAST(CAST(NULL AS STRING) AS BINARY)"
+        }
+
+        val df4 = sql(s"SELECT (SELECT $column1 FROM tbl LIMIT 1) AS a, _1, _2 FROM tbl")
+        checkSparkAnswerAndOperator(df4)
+      }
+    }
+  }
+
+  test("Comet native metrics: project and filter") {
+    withSQLConf(
+      CometConf.COMET_EXEC_ENABLED.key -> "true",
+      CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+      withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+        val df = sql("SELECT _1 + 1, _2 + 2 FROM tbl WHERE _1 > 3")
+        df.collect()
+
+        var metrics = find(df.queryExecution.executedPlan) {
+          case _: CometProjectExec => true
+          case _ => false
+        }.map(_.metrics).get
+
+        assert(metrics.contains("output_rows"))
+        assert(metrics("output_rows").value == 1L)
+
+        metrics = find(df.queryExecution.executedPlan) {
+          case _: CometFilterExec => true
+          case _ => false
+        }.map(_.metrics).get
+
+        assert(metrics.contains("output_rows"))
+        assert(metrics("output_rows").value == 1L)
+      }
+    }
+  }
+
+  test("expand operator") {
+    val data1 = (0 until 1000)
+      .map(_ % 5) // reduce value space to trigger dictionary encoding
+      .map(i => (i, i + 100, i + 10))
+    val data2 = (0 until 5).map(i => (i, i + 1, i * 1000))
+
+    Seq(data1, data2).foreach { tableData =>
+      withParquetTable(tableData, "tbl") {
+        val df = sql("SELECT _1, _2, SUM(_3) FROM tbl GROUP BY _1, _2 GROUPING SETS ((_1), (_2))")
+        checkSparkAnswerAndOperator(df, classOf[HashAggregateExec], classOf[ShuffleExchangeExec])
+      }
+    }
+  }
+
+  test("multiple distinct multiple columns sets") {
+    withTable("agg2") {
+      val data2 = Seq[(Integer, Integer, Integer)](
+        (1, 10, -10),
+        (null, -60, 60),
+        (1, 30, -30),
+        (1, 30, 30),
+        (2, 1, 1),
+        (null, -10, 10),
+        (2, -1, null),
+        (2, 1, 1),
+        (2, null, 1),
+        (null, 100, -10),
+        (3, null, 3),
+        (null, null, null),
+        (3, null, null)).toDF("key", "value1", "value2")
+      data2.write.saveAsTable("agg2")
+
+      val df = spark.sql("""
+          |SELECT
+          |  key,
+          |  count(distinct value1),
+          |  sum(distinct value1),
+          |  count(distinct value2),
+          |  sum(distinct value2),
+          |  count(distinct value1, value2),
+          |  count(value1),
+          |  sum(value1),
+          |  count(value2),
+          |  sum(value2),
+          |  count(*),
+          |  count(1)
+          |FROM agg2
+          |GROUP BY key
+              """.stripMargin)
+
+      // The above query uses COUNT(DISTINCT) which Comet doesn't support yet, so the plan will
+      // have a mix of `HashAggregate` and `CometHashAggregate`. In the following we check all
+      // operators starting from `CometHashAggregate` are native.
+      checkSparkAnswer(df)
+      val subPlan = stripAQEPlan(df.queryExecution.executedPlan).collectFirst {
+        case s: CometHashAggregateExec => s
+      }
+      assert(subPlan.isDefined)
+      checkCometOperators(subPlan.get)
+    }
+  }
+
+  test("transformed cometPlan") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df = sql("select * FROM tbl where _1 >= 2").select("_1")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("project") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df = sql("SELECT _1 + 1, _2 + 2, _1 - 1, _2 * 2, _2 / 2 FROM tbl")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("project + filter on arrays") {
+    withParquetTable((0 until 5).map(i => (i, i)), "tbl") {
+      val df = sql("SELECT _1 FROM tbl WHERE _1 == _2")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("project + filter") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df = sql("SELECT _1 + 1, _2 + 2 FROM tbl WHERE _1 > 3")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("empty projection") {
+    withParquetDataFrame((0 until 5).map(i => (i, i + 1))) { df =>
+      assert(df.where("_1 IS NOT NULL").count() == 5)
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("filter on string") {
+    withParquetTable((0 until 5).map(i => (i, i.toString)), "tbl") {
+      val df = sql("SELECT _1 + 1, _2 FROM tbl WHERE _2 = '3'")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("filter on dictionary string") {
+    val data = (0 until 1000)
+      .map(_ % 5) // reduce value space to trigger dictionary encoding
+      .map(i => (i.toString, (i + 100).toString))
+
+    withParquetTable(data, "tbl") {
+      val df = sql("SELECT _1, _2 FROM tbl WHERE _1 = '3'")
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("sort with dictionary") {
+    withSQLConf(CometConf.COMET_BATCH_SIZE.key -> 8192.toString) {
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "test")
+        spark
+          .createDataFrame((0 until 1000).map(i => (i % 5, (i % 7).toLong)))
+          .write
+          .option("compression", "none")
+          .parquet(path.toString)
+
+        spark
+          .createDataFrame((0 until 1000).map(i => (i % 3 + 7, (i % 13 + 10).toLong)))
+          .write
+          .option("compression", "none")
+          .mode(SaveMode.Append)
+          .parquet(path.toString)
+
+        val df = spark.read
+          .format("parquet")
+          .load(path.toString)
+          .sortWithinPartitions($"_1".asc, $"_2".desc)
+
+        checkSparkAnswerAndOperator(df)
+      }
+    }
+  }
+
+  test("final aggregation") {
+    withParquetTable(
+      (0 until 100)
+        .map(_ => (Random.nextInt(), Random.nextInt() % 5)),
+      "tbl") {
+      val df = sql("SELECT _2, COUNT(*) FROM tbl GROUP BY _2")
+      checkSparkAnswerAndOperator(df, classOf[HashAggregateExec], classOf[ShuffleExchangeExec])
+    }
+  }
+
+  test("sort (non-global)") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df = sql("SELECT * FROM tbl").sortWithinPartitions($"_1".desc)
+      checkSparkAnswerAndOperator(df)
+    }
+  }
+
+  test("spill sort with (multiple) dictionaries") {
+    withSQLConf(CometConf.COMET_MEMORY_OVERHEAD.key -> "15MB") {
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawTimeParquetFileColumns(path, dictionaryEnabled = true, n = 1000, rowGroupSize = 10)
+        readParquetFile(path.toString) { df =>
+          Seq(
+            $"_0".desc_nulls_first,
+            $"_0".desc_nulls_last,
+            $"_0".asc_nulls_first,
+            $"_0".asc_nulls_last).foreach { colOrder =>
+            val query = df.sortWithinPartitions(colOrder)
+            checkSparkAnswerAndOperator(query)
+          }
+        }
+      }
+    }
+  }
+
+  test("spill sort with (multiple) dictionaries on mixed columns") {
+    withSQLConf(CometConf.COMET_MEMORY_OVERHEAD.key -> "15MB") {
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawTimeParquetFile(path, dictionaryEnabled = true, n = 1000, rowGroupSize = 10)
+        readParquetFile(path.toString) { df =>
+          Seq(
+            $"_6".desc_nulls_first,
+            $"_6".desc_nulls_last,
+            $"_6".asc_nulls_first,
+            $"_6".asc_nulls_last).foreach { colOrder =>
+            // TODO: We should be able to sort on dictionary timestamp column
+            val query = df.sortWithinPartitions(colOrder)
+            checkSparkAnswerAndOperator(query)
+          }
+        }
+      }
+    }
+  }
+
+  test("limit (cartesian product)") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_a") {
+        withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_b") {
+          val df = sql("SELECT tbl_a._1, tbl_b._2 FROM tbl_a JOIN tbl_b LIMIT 2")
+          checkSparkAnswerAndOperator(
+            df,
+            classOf[CollectLimitExec],
+            classOf[CartesianProductExec])
+        }
+      }
+    }
+  }
+
+  test("limit with more than one batch") {
+    withSQLConf(CometConf.COMET_BATCH_SIZE.key -> "1") {
+      withParquetTable((0 until 50).map(i => (i, i + 1)), "tbl_a") {
+        withParquetTable((0 until 50).map(i => (i, i + 1)), "tbl_b") {
+          val df = sql("SELECT tbl_a._1, tbl_b._2 FROM tbl_a JOIN tbl_b LIMIT 2")
+          checkSparkAnswerAndOperator(
+            df,
+            classOf[CollectLimitExec],
+            classOf[BroadcastNestedLoopJoinExec],
+            classOf[BroadcastExchangeExec])
+        }
+      }
+    }
+  }
+
+  test("limit less than rows") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_a") {
+      withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_b") {
+        val df = sql(
+          "SELECT tbl_a._1, tbl_b._2 FROM tbl_a JOIN tbl_b " +
+            "WHERE tbl_a._1 > tbl_a._2 LIMIT 2")
+        checkSparkAnswerAndOperator(
+          df,
+          classOf[CollectLimitExec],
+          classOf[BroadcastNestedLoopJoinExec],
+          classOf[BroadcastExchangeExec])
+      }
+    }
+  }
+
+  test("empty-column input (read schema is empty)") {
+    withTable("t1") {
+      Seq((1, true), (2, false))
+        .toDF("l", "b")
+        .repartition(2)
+        .write
+        .saveAsTable("t1")
+      val query = spark.table("t1").selectExpr("IF(l > 1 AND null, 5, 1) AS out")
+      checkSparkAnswerAndOperator(query)
+    }
+  }
+
+  test("empty-column aggregation") {
+    withTable("t1") {
+      Seq((1, true), (2, false))
+        .toDF("l", "b")
+        .repartition(2)
+        .write
+        .saveAsTable("t1")
+      val query = sql("SELECT count(1) FROM t1")
+      checkSparkAnswerAndOperator(query, classOf[HashAggregateExec], classOf[ShuffleExchangeExec])
+    }
+  }
+
+  test("null handling") {
+    Seq("true", "false").foreach { pushDown =>
+      val table = "t1"
+      withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> pushDown) {
+        withTable(table) {
+          sql(s"create table $table(a int, b int, c int) using parquet")
+          sql(s"insert into $table values(1,0,0)")
+          sql(s"insert into $table values(2,0,1)")
+          sql(s"insert into $table values(3,1,0)")
+          sql(s"insert into $table values(4,1,1)")
+          sql(s"insert into $table values(5,null,0)")
+          sql(s"insert into $table values(6,null,1)")
+          sql(s"insert into $table values(7,null,null)")
+
+          val query = sql(s"select a+120 from $table where b<10 OR c=1")
+          checkSparkAnswerAndOperator(query)
+        }
+      }
+    }
+  }
+
+  test("float4.sql") {
+    val table = "t1"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (f1  float) USING parquet")
+      sql(s"INSERT INTO $table VALUES (float('    0.0'))")
+      sql(s"INSERT INTO $table VALUES (float('1004.30   '))")
+      sql(s"INSERT INTO $table VALUES (float('     -34.84    '))")
+      sql(s"INSERT INTO $table VALUES (float('1.2345678901234e+20'))")
+      sql(s"INSERT INTO $table VALUES (float('1.2345678901234e-20'))")
+
+      val query = sql(s"SELECT '' AS four, f.* FROM $table f WHERE '1004.3' > f.f1")
+      checkSparkAnswerAndOperator(query)
+    }
+  }
+
+  test("NaN in predicate expression") {
+    val t = "test_table"
+
+    withTable(t) {
+      Seq[(Integer, java.lang.Short, java.lang.Float)](
+        (1, 100.toShort, 3.14.toFloat),
+        (2, Short.MaxValue, Float.NaN),
+        (3, Short.MinValue, Float.PositiveInfinity),
+        (4, 0.toShort, Float.MaxValue),
+        (5, null, null))
+        .toDF("c1", "c2", "c3")
+        .write
+        .saveAsTable(t)
+
+      val df = spark.table(t)
+
+      var query = df.where("c3 > double('nan')").select("c1")
+      checkSparkAnswer(query)
+      // Empty result will be optimized to a local relation. No CometExec involved.
+      // checkCometExec(query, 0, cometExecs => {})
+
+      query = df.where("c3 >= double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+      // checkCometExec(query, 1, cometExecs => {})
+
+      query = df.where("c3 == double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+
+      query = df.where("c3 <=> double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+
+      query = df.where("c3 != double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+
+      query = df.where("c3 <= double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+
+      query = df.where("c3 < double('nan')").select("c1")
+      checkSparkAnswerAndOperator(query)
+    }
+  }
+
+  test("table statistics") {
+    withTempDatabase { database =>
+      spark.catalog.setCurrentDatabase(database)
+      withTempDir { dir =>
+        withTable("t1", "t2") {
+          spark.range(10).write.saveAsTable("t1")
+          sql(
+            s"CREATE EXTERNAL TABLE t2 USING parquet LOCATION '${dir.toURI}' " +
+              "AS SELECT * FROM range(20)")
+
+          sql(s"ANALYZE TABLES IN $database COMPUTE STATISTICS NOSCAN")
+          checkTableStats("t1", hasSizeInBytes = true, expectedRowCounts = None)
+          checkTableStats("t2", hasSizeInBytes = true, expectedRowCounts = None)
+
+          sql("ANALYZE TABLES COMPUTE STATISTICS")
+          checkTableStats("t1", hasSizeInBytes = true, expectedRowCounts = Some(10))
+          checkTableStats("t2", hasSizeInBytes = true, expectedRowCounts = Some(20))
+        }
+      }
+    }
+  }
+
+  test("like (LikeSimplification disabled)") {
+    val table = "names"
+    withSQLConf(
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "org.apache.spark.sql.catalyst.optimizer.LikeSimplification") {
+      withTable(table) {
+        sql(s"create table $table(id int, name varchar(20)) using parquet")
+        sql(s"insert into $table values(1,'James Smith')")
+        sql(s"insert into $table values(2,'Michael Rose')")
+        sql(s"insert into $table values(3,'Robert Williams')")
+        sql(s"insert into $table values(4,'Rames Rose')")
+        sql(s"insert into $table values(5,'Rames rose')")
+
+        // Filter column having values 'Rames _ose', where any character matches for '_'
+        val query = sql(s"select id from $table where name like 'Rames _ose'")
+        checkSparkAnswerAndOperator(query)
+
+        // Filter rows that contains 'rose' in 'name' column
+        val queryContains = sql(s"select id from $table where name like '%rose%'")
+        checkSparkAnswerAndOperator(queryContains)
+
+        // Filter rows that starts with 'R' following by any characters
+        val queryStartsWith = sql(s"select id from $table where name like 'R%'")
+        checkSparkAnswerAndOperator(queryStartsWith)
+
+        // Filter rows that ends with 's' following by any characters
+        val queryEndsWith = sql(s"select id from $table where name like '%s'")
+        checkSparkAnswerAndOperator(queryEndsWith)
+      }
+    }
+  }
+
+  test("sum overflow (ANSI disable)") {
+    Seq("true", "false").foreach { dictionary =>
+      withSQLConf(
+        SQLConf.ANSI_ENABLED.key -> "false",
+        "parquet.enable.dictionary" -> dictionary) {
+        withParquetTable(Seq((Long.MaxValue, 1), (Long.MaxValue, 2)), "tbl") {
+          val df = sql("SELECT sum(_1) FROM tbl")
+          checkSparkAnswerAndOperator(
+            df,
+            classOf[HashAggregateExec],
+            classOf[ShuffleExchangeExec])
+        }
+      }
+    }
+  }
+
+  test("partition col") {
+    withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu") {
+      withTable("t1") {
+        sql("""
+            | CREATE TABLE t1(name STRING, part1 TIMESTAMP)
+            | USING PARQUET PARTITIONED BY (part1)
+       """.stripMargin)
+
+        sql("""
+            | INSERT OVERWRITE t1 PARTITION(
+            | part1 = timestamp'2019-01-01 11:11:11'
+            | ) VALUES('a')
+      """.stripMargin)
+        checkSparkAnswerAndOperator(sql("""
+            | SELECT
+            |   name,
+            |   CAST(part1 AS STRING)
+            | FROM t1
+      """.stripMargin))
+      }
+    }
+  }
+
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu") {
+      withTable("t1") {
+        val binaryStr = "Spark SQL"
+        val binaryHexStr = Hex.hex(UTF8String.fromString(binaryStr).getBytes).toString
+        sql("""
+            | CREATE TABLE t1(name STRING, part1 DATE, part2 TIMESTAMP, part3 BINARY,
+            |  part4 STRING, part5 STRING, part6 STRING, part7 STRING)
+            | USING PARQUET PARTITIONED BY (part1, part2, part3, part4, part5, part6, part7)
+         """.stripMargin)
+
+        sql(s"""
+             | INSERT OVERWRITE t1 PARTITION(
+             | part1 = date'2019-01-01',
+             | part2 = timestamp'2019-01-01 11:11:11',
+             | part3 = X'$binaryHexStr',
+             | part4 = 'p1',
+             | part5 = date'2019-01-01',
+             | part6 = timestamp'2019-01-01 11:11:11',
+             | part7 = X'$binaryHexStr'
+             | ) VALUES('a')
+        """.stripMargin)
+        checkSparkAnswerAndOperator(sql("""
+              | SELECT
+              |   name,
+              |   CAST(part1 AS STRING),
+              |   CAST(part2 as STRING),
+              |   CAST(part3 as STRING),
+              |   part4,
+              |   part5,
+              |   part6,
+              |   part7
+              | FROM t1
+        """.stripMargin))
+
+        val e = intercept[AnalysisException] {
+          sql("CREATE TABLE t2(name STRING, part INTERVAL) USING PARQUET PARTITIONED BY (part)")
+        }.getMessage
+        assert(e.contains("Cannot use interval"))
+      }
+    }
+  }
+
+  def getCatalogTable(tableName: String): CatalogTable = {
+    spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+  }
+
+  def checkTableStats(
+      tableName: String,
+      hasSizeInBytes: Boolean,
+      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
+    val stats = getCatalogTable(tableName).stats
+    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
+      assert(stats.isDefined)
+      assert(stats.get.sizeInBytes >= 0)
+      assert(stats.get.rowCount === expectedRowCounts)
+    } else {
+      assert(stats.isEmpty)
+    }
+
+    stats
+  }
+
+  def joinCondition(joinCols: Seq[String])(left: DataFrame, right: DataFrame): Column = {
+    joinCols.map(col => left(col) === right(col)).reduce(_ && _)
+  }
+
+  def testBucketing(
+      bucketedTableTestSpecLeft: BucketedTableTestSpec,
+      bucketedTableTestSpecRight: BucketedTableTestSpec,
+      joinType: String = "inner",
+      joinCondition: (DataFrame, DataFrame) => Column): Unit = {
+    val df1 =
+      (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k").as("df1")
+    val df2 =
+      (0 until 50).map(i => (i % 7, i % 11, i.toString)).toDF("i", "j", "k").as("df2")
+
+    val BucketedTableTestSpec(bucketSpecLeft, numPartitionsLeft, _, _, _) =
+      bucketedTableTestSpecLeft
+
+    val BucketedTableTestSpec(bucketSpecRight, numPartitionsRight, _, _, _) =
+      bucketedTableTestSpecRight
+
+    withTable("bucketed_table1", "bucketed_table2") {
+      withBucket(df1.repartition(numPartitionsLeft).write.format("parquet"), bucketSpecLeft)
+        .saveAsTable("bucketed_table1")
+      withBucket(df2.repartition(numPartitionsRight).write.format("parquet"), bucketSpecRight)
+        .saveAsTable("bucketed_table2")
+
+      withSQLConf(
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0",
+        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+        val t1 = spark.table("bucketed_table1")
+        val t2 = spark.table("bucketed_table2")
+        val joined = t1.join(t2, joinCondition(t1, t2), joinType)
+
+        val df = joined.sort("bucketed_table1.k", "bucketed_table2.k")
+        checkSparkAnswer(df)
+
+        // The sub-plan contains should contain all native operators except a SMJ
+        val subPlan = stripAQEPlan(df.queryExecution.executedPlan).collectFirst {
+          case s: SortMergeJoinExec => s
+        }
+        assert(subPlan.isDefined)
+        checkCometOperators(subPlan.get, classOf[SortMergeJoinExec])
+      }
+    }
+  }
+
+  test("bucketed table") {
+    val bucketSpec = Some(BucketSpec(8, Seq("i", "j"), Nil))
+    val bucketedTableTestSpecLeft = BucketedTableTestSpec(bucketSpec, expectedShuffle = false)
+    val bucketedTableTestSpecRight = BucketedTableTestSpec(bucketSpec, expectedShuffle = false)
+
+    testBucketing(
+      bucketedTableTestSpecLeft = bucketedTableTestSpecLeft,
+      bucketedTableTestSpecRight = bucketedTableTestSpecRight,
+      joinCondition = joinCondition(Seq("i", "j")))
+  }
+
+  def withBucket(
+      writer: DataFrameWriter[Row],
+      bucketSpec: Option[BucketSpec]): DataFrameWriter[Row] = {
+    bucketSpec
+      .map { spec =>
+        writer.bucketBy(
+          spec.numBuckets,
+          spec.bucketColumnNames.head,
+          spec.bucketColumnNames.tail: _*)
+
+        if (spec.sortColumnNames.nonEmpty) {
+          writer.sortBy(spec.sortColumnNames.head, spec.sortColumnNames.tail: _*)
+        } else {
+          writer
+        }
+      }
+      .getOrElse(writer)
+  }
+
+  test("union") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df1 = sql("select * FROM tbl where _1 >= 2").select("_1")
+      val df2 = sql("select * FROM tbl where _1 >= 2").select("_2")
+      val df3 = sql("select * FROM tbl where _1 >= 3").select("_2")
+
+      val unionDf1 = df1.union(df2)
+      checkSparkAnswerAndOperator(unionDf1)
+
+      // Test union with different number of rows from inputs
+      val unionDf2 = df1.union(df3)
+      checkSparkAnswerAndOperator(unionDf2)
+
+      val unionDf3 = df1.union(df2).union(df3)
+      checkSparkAnswerAndOperator(unionDf3)
+    }
+  }
+
+  test("native execution after union") {
+    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+      val df1 = sql("select * FROM tbl where _1 >= 2").select("_1")
+      val df2 = sql("select * FROM tbl where _1 >= 2").select("_2")
+      val df3 = sql("select * FROM tbl where _1 >= 3").select("_2")
+
+      val unionDf1 = df1.union(df2).select($"_1" + 1).sortWithinPartitions($"_1")
+      checkSparkAnswerAndOperator(unionDf1)
+
+      // Test union with different number of rows from inputs
+      val unionDf2 = df1.union(df3).select($"_1" + 1).sortWithinPartitions($"_1")
+      checkSparkAnswerAndOperator(unionDf2)
+
+      val unionDf3 = df1.union(df2).union(df3).select($"_1" + 1).sortWithinPartitions($"_1")
+      checkSparkAnswerAndOperator(unionDf3)
+    }
+  }
+
+  test("native execution after coalesce") {
+    withTable("t1") {
+      (0 until 5)
+        .map(i => (i, (i + 1).toLong))
+        .toDF("l", "b")
+        .write
+        .saveAsTable("t1")
+
+      val df = sql("SELECT * FROM t1")
+        .sortWithinPartitions($"l".desc)
+        .repartition(10, $"l")
+
+      val rdd = df.rdd
+      assert(rdd.partitions.length == 10)
+
+      val coalesced = df.coalesce(2).select($"l" + 1).sortWithinPartitions($"l")
+      checkSparkAnswerAndOperator(
+        coalesced,
+        classOf[ProjectExec],
+        classOf[SortExec],
+        classOf[CoalesceExec],
+        classOf[ShuffleExchangeExec])
+    }
+  }
+
+  test("disabled/unsupported exec with multiple children should not disappear") {
+    withSQLConf(
+      CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "false",
+      CometConf.COMET_EXEC_CONFIG_PREFIX + ".project.enabled" -> "true",
+      CometConf.COMET_EXEC_CONFIG_PREFIX + ".union.enabled" -> "false") {
+      withParquetDataFrame((0 until 5).map(Tuple1(_))) { df =>
+        val projected = df.selectExpr("_1 as x")
+        val unioned = projected.union(df)
+        val p = unioned.queryExecution.executedPlan.find(_.isInstanceOf[UnionExec])
+        assert(p.get.collectLeaves().forall(_.isInstanceOf[CometScanExec]))
+      }
+    }
+  }
+
+  test("coalesce") {
+    withTable("t1") {
+      (0 until 5)
+        .map(i => (i, (i + 1).toLong))
+        .toDF("l", "b")
+        .write
+        .saveAsTable("t1")
+
+      val df = sql("SELECT * FROM t1")
+        .sortWithinPartitions($"l".desc)
+        .repartition(10, $"l")
+
+      val rdd = df.rdd
+      assert(rdd.getNumPartitions == 10)
+
+      val coalesced = df.coalesce(2)
+      checkSparkAnswerAndOperator(coalesced, classOf[CoalesceExec], classOf[ShuffleExchangeExec])
+
+      val coalescedRdd = coalesced.rdd
+      assert(coalescedRdd.getNumPartitions == 2)
+    }
+  }
+}
+
+case class BucketedTableTestSpec(
+    bucketSpec: Option[BucketSpec],
+    numPartitions: Int = 10,
+    expectedShuffle: Boolean = true,
+    expectedSort: Boolean = true,
+    expectedNumOutputPartitions: Option[Int] = None)
diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
new file mode 100644
index 000000000..4f92242ef
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
@@ -0,0 +1,1351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.parquet
+
+import java.io.{File, FileFilter}
+import java.math.BigDecimal
+import java.time.{ZoneId, ZoneOffset}
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.example.data.simple.SimpleGroup
+import org.apache.parquet.schema.MessageTypeParser
+import org.apache.spark.SparkException
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.comet.CometBatchScanExec
+import org.apache.spark.sql.comet.CometScanExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+import com.google.common.primitives.UnsignedLong
+
+abstract class ParquetReadSuite extends CometTestBase {
+  import testImplicits._
+
+  testStandardAndLegacyModes("decimals") {
+    Seq(true, false).foreach { useDecimal128 =>
+      Seq(16, 1024).foreach { batchSize =>
+        withSQLConf(
+          CometConf.COMET_EXEC_ENABLED.key -> false.toString,
+          CometConf.COMET_USE_DECIMAL_128.key -> useDecimal128.toString,
+          CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+          var combinations = Seq((5, 2), (1, 0), (18, 10), (18, 17), (19, 0), (38, 37))
+          // If ANSI mode is on, the combination (1, 1) will cause a runtime error. Otherwise, the
+          // decimal RDD contains all null values and should be able to read back from Parquet.
+
+          if (!SQLConf.get.ansiEnabled) {
+            combinations = combinations ++ Seq((1, 1))
+          }
+          for ((precision, scale) <- combinations; useDictionary <- Seq(false, true)) {
+            withTempPath { dir =>
+              val data = makeDecimalRDD(1000, DecimalType(precision, scale), useDictionary)
+              data.write.parquet(dir.getCanonicalPath)
+              readParquetFile(dir.getCanonicalPath) { df =>
+                {
+                  checkAnswer(df, data.collect().toSeq)
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("simple count") {
+    withParquetTable((0 until 10).map(i => (i, i.toString)), "tbl") {
+      assert(sql("SELECT * FROM tbl WHERE _1 % 2 == 0").count() == 5)
+    }
+  }
+
+  test("basic data types") {
+    Seq(7, 1024).foreach { batchSize =>
+      withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+        val data = (-100 to 100).map { i =>
+          (
+            i % 2 == 0,
+            i,
+            i.toByte,
+            i.toShort,
+            i.toLong,
+            i.toFloat,
+            i.toDouble,
+            DateTimeUtils.toJavaDate(i))
+        }
+        checkParquetScan(data)
+        checkParquetFile(data)
+      }
+    }
+  }
+
+  test("basic data types with dictionary") {
+    Seq(7, 1024).foreach { batchSize =>
+      withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+        val data = (-100 to 100).map(_ % 4).map { i =>
+          (
+            i % 2 == 0,
+            i,
+            i.toByte,
+            i.toShort,
+            i.toLong,
+            i.toFloat,
+            i.toDouble,
+            DateTimeUtils.toJavaDate(i))
+        }
+        checkParquetScan(data)
+        checkParquetFile(data)
+      }
+    }
+  }
+
+  test("basic filters") {
+    val data = (-100 to 100).map { i =>
+      (
+        i % 2 == 0,
+        i,
+        i.toByte,
+        i.toShort,
+        i.toLong,
+        i.toFloat,
+        i.toDouble,
+        DateTimeUtils.toJavaDate(i))
+    }
+    val filter = (row: Row) => row.getBoolean(0)
+    checkParquetScan(data, filter)
+    checkParquetFile(data, filter)
+  }
+
+  test("raw binary test") {
+    val data = (1 to 4).map(i => Tuple1(Array.fill(3)(i.toByte)))
+    withParquetDataFrame(data) { df =>
+      assertResult(data.map(_._1.mkString(",")).sorted) {
+        df.collect().map(_.getAs[Array[Byte]](0).mkString(",")).sorted
+      }
+    }
+  }
+
+  test("string") {
+    val data = (1 to 4).map(i => Tuple1(i.toString))
+    // Property spark.sql.parquet.binaryAsString shouldn't affect Parquet files written by Spark SQL
+    // as we store Spark SQL schema in the extra metadata.
+    withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "false")(checkParquetFile(data))
+    withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "true")(checkParquetFile(data))
+  }
+
+  test("string with dictionary") {
+    Seq((100, 5), (1000, 10)).foreach { case (total, divisor) =>
+      val data = (1 to total).map(i => Tuple1((i % divisor).toString))
+      // Property spark.sql.parquet.binaryAsString shouldn't affect Parquet files written by Spark SQL
+      // as we store Spark SQL schema in the extra metadata.
+      withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "false")(checkParquetFile(data))
+      withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING.key -> "true")(checkParquetFile(data))
+    }
+  }
+
+  test("long string + reserve additional space for value buffer") {
+    withSQLConf(CometConf.COMET_BATCH_SIZE.key -> 16.toString) {
+      val data = (1 to 100).map(i => (i, i.toString * 10))
+      checkParquetFile(data)
+    }
+  }
+
+  test("timestamp") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        val expected = makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000)
+        val useLocalDateTime = spark.version >= "3.3"
+        readParquetFile(path.toString) { df =>
+          checkAnswer(
+            df.select($"_0", $"_1", $"_2", $"_3", $"_4", $"_5"),
+            expected.map {
+              case None =>
+                Row(null, null, null, null, null, null)
+              case Some(i) =>
+                // use `LocalDateTime` for `TimestampNTZType` with Spark 3.3 and above. At the moment,
+                // Spark reads Parquet timestamp values into `Timestamp` (with local timezone)
+                // regardless of whether `isAdjustedToUTC` is true or false. See SPARK-36182.
+                // TODO: make `LocalDateTime` default after dropping Spark 3.2.0 support
+                val ts = new java.sql.Timestamp(i)
+                val ldt = if (useLocalDateTime) {
+                  ts.toLocalDateTime
+                    .atZone(ZoneId.systemDefault())
+                    .withZoneSameInstant(ZoneOffset.UTC)
+                    .toLocalDateTime
+                } else ts
+                Row(ts, ts, ts, ldt, ts, ldt)
+            })
+        }
+      }
+    }
+  }
+
+  test("timestamp as int96") {
+    import testImplicits._
+
+    val N = 100
+    val ts = "2020-01-01 01:02:03.123456"
+    Seq(false, true).foreach { dictionaryEnabled =>
+      Seq(false, true).foreach { conversionEnabled =>
+        withSQLConf(
+          SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> "INT96",
+          SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key -> conversionEnabled.toString) {
+          withTempPath { path =>
+            Seq
+              .tabulate(N)(_ => ts)
+              .toDF("ts1")
+              .select($"ts1".cast("timestamp").as("ts"))
+              .repartition(1)
+              .write
+              .option("parquet.enable.dictionary", dictionaryEnabled)
+              .parquet(path.getCanonicalPath)
+
+            checkAnswer(
+              spark.read.parquet(path.getCanonicalPath).select($"ts".cast("string")),
+              Seq.tabulate(N)(_ => Row(ts)))
+          }
+        }
+      }
+    }
+  }
+
+  test("batch paging on basic types") {
+    Seq(1, 2, 4, 9).foreach { batchSize =>
+      withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+        val data = (1 to 10).map(i => (i, i.toByte, i.toShort, i.toFloat, i.toDouble, i.toString))
+        checkParquetFile(data)
+      }
+    }
+  }
+
+  test("nulls") {
+    val allNulls = (
+      null.asInstanceOf[java.lang.Boolean],
+      null.asInstanceOf[Integer],
+      null.asInstanceOf[java.lang.Long],
+      null.asInstanceOf[java.lang.Float],
+      null.asInstanceOf[java.lang.Double],
+      null.asInstanceOf[java.lang.String])
+
+    withParquetDataFrame(allNulls :: Nil) { df =>
+      val rows = df.collect()
+      assert(rows.length === 1)
+      assert(rows.head === Row(Seq.fill(6)(null): _*))
+      assert(df.where("_1 is null").count() == 1)
+    }
+  }
+
+  test("mixed nulls and non-nulls") {
+    val rand = scala.util.Random
+    val data = (0 to 100).map { i =>
+      val row: (Boolean, Integer, java.lang.Long, java.lang.Float, java.lang.Double, String) = {
+        if (rand.nextBoolean()) {
+          (i % 2 == 0, i, i.toLong, i.toFloat, i.toDouble, i.toString)
+        } else {
+          (
+            null.asInstanceOf[java.lang.Boolean],
+            null.asInstanceOf[Integer],
+            null.asInstanceOf[java.lang.Long],
+            null.asInstanceOf[java.lang.Float],
+            null.asInstanceOf[java.lang.Double],
+            null.asInstanceOf[String])
+        }
+      }
+      row
+    }
+    checkParquetFile(data)
+  }
+
+  test("test multiple pages with different sizes and nulls") {
+    def makeRawParquetFile(
+        path: Path,
+        dictionaryEnabled: Boolean,
+        n: Int,
+        pageSize: Int): Seq[Option[Int]] = {
+      val schemaStr = {
+        if (isSpark34Plus) {
+          """
+            |message root {
+            |  optional boolean                 _1;
+            |  optional int32                   _2(INT_8);
+            |  optional int32                   _3(INT_16);
+            |  optional int32                   _4;
+            |  optional int64                   _5;
+            |  optional float                   _6;
+            |  optional double                  _7;
+            |  optional binary                  _8(UTF8);
+            |  optional int32                   _9(UINT_8);
+            |  optional int32                   _10(UINT_16);
+            |  optional int32                   _11(UINT_32);
+            |  optional int64                   _12(UINT_64);
+            |  optional binary                  _13(ENUM);
+            |  optional FIXED_LEN_BYTE_ARRAY(3) _14;
+            |}
+        """.stripMargin
+        } else {
+          """
+            |message root {
+            |  optional boolean                 _1;
+            |  optional int32                   _2(INT_8);
+            |  optional int32                   _3(INT_16);
+            |  optional int32                   _4;
+            |  optional int64                   _5;
+            |  optional float                   _6;
+            |  optional double                  _7;
+            |  optional binary                  _8(UTF8);
+            |  optional int32                   _9(UINT_8);
+            |  optional int32                   _10(UINT_16);
+            |  optional int32                   _11(UINT_32);
+            |  optional int64                   _12(UINT_64);
+            |  optional binary                  _13(ENUM);
+            |  optional binary                  _14(UTF8);
+            |}
+        """.stripMargin
+        }
+      }
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(
+        schema,
+        path,
+        dictionaryEnabled = dictionaryEnabled,
+        pageSize = pageSize,
+        dictionaryPageSize = pageSize)
+
+      val rand = scala.util.Random
+      val expected = (0 until n).map { i =>
+        if (rand.nextBoolean()) {
+          None
+        } else {
+          Some(i)
+        }
+      }
+      expected.foreach { opt =>
+        val record = new SimpleGroup(schema)
+        opt match {
+          case Some(i) =>
+            record.add(0, i % 2 == 0)
+            record.add(1, i.toByte)
+            record.add(2, i.toShort)
+            record.add(3, i)
+            record.add(4, i.toLong)
+            record.add(5, i.toFloat)
+            record.add(6, i.toDouble)
+            record.add(7, i.toString * 48)
+            record.add(8, (-i).toByte)
+            record.add(9, (-i).toShort)
+            record.add(10, -i)
+            record.add(11, (-i).toLong)
+            record.add(12, i.toString)
+            record.add(13, (i % 10).toString * 3)
+          case _ =>
+        }
+        writer.write(record)
+      }
+
+      writer.close()
+      expected
+    }
+
+    Seq(64, 128, 256, 512, 1024, 4096, 5000).foreach { pageSize =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        val expected = makeRawParquetFile(path, dictionaryEnabled = false, 10000, pageSize)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(
+            df,
+            expected.map {
+              case None =>
+                Row(null, null, null, null, null, null, null, null, null, null, null, null, null,
+                  null)
+              case Some(i) =>
+                val flba_field = if (isSpark34Plus) {
+                  Array.fill(3)(i % 10 + 48) // char '0' is 48 in ascii
+                } else {
+                  (i % 10).toString * 3
+                }
+                Row(
+                  i % 2 == 0,
+                  i.toByte,
+                  i.toShort,
+                  i,
+                  i.toLong,
+                  i.toFloat,
+                  i.toDouble,
+                  i.toString * 48,
+                  java.lang.Byte.toUnsignedInt((-i).toByte),
+                  java.lang.Short.toUnsignedInt((-i).toShort),
+                  java.lang.Integer.toUnsignedLong(-i),
+                  new BigDecimal(UnsignedLong.fromLongBits((-i).toLong).bigIntegerValue()),
+                  i.toString,
+                  flba_field)
+            })
+        }
+        readParquetFile(path.toString) { df =>
+          assert(
+            df.filter("_8 IS NOT NULL AND _4 % 256 == 255").count() ==
+              expected.flatten.count(_ % 256 == 255))
+        }
+      }
+    }
+  }
+
+  test("vector reloading with all non-null values") {
+    def makeRawParquetFile(
+        path: Path,
+        dictionaryEnabled: Boolean,
+        n: Int,
+        numNonNulls: Int): Seq[Option[Int]] = {
+      val schemaStr =
+        """
+          |message root {
+          | optional int32 _1;
+          |}
+        """.stripMargin
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(schema, path, dictionaryEnabled = dictionaryEnabled)
+
+      val expected = (0 until n).map { i =>
+        if (i >= numNonNulls) {
+          None
+        } else {
+          Some(i)
+        }
+      }
+      expected.foreach { opt =>
+        val record = new SimpleGroup(schema)
+        opt match {
+          case Some(i) =>
+            record.add(0, i)
+          case _ =>
+        }
+        writer.write(record)
+      }
+
+      writer.close()
+      expected
+    }
+
+    Seq(2, 99, 1024).foreach { numNonNulls =>
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        val expected = makeRawParquetFile(path, dictionaryEnabled = false, 1024, numNonNulls)
+        withSQLConf(CometConf.COMET_BATCH_SIZE.key -> "2") {
+          readParquetFile(path.toString) { df =>
+            checkAnswer(
+              df,
+              expected.map {
+                case None =>
+                  Row(null)
+                case Some(i) =>
+                  Row(i)
+              })
+          }
+        }
+      }
+    }
+  }
+
+  test("test lazy materialization skipping") {
+    def makeRawParquetFile(
+        path: Path,
+        dictionaryEnabled: Boolean,
+        pageSize: Int,
+        pageRowCountLimit: Int,
+        expected: Seq[Row]): Unit = {
+      val schemaStr =
+        """
+          |message root {
+          |  optional int32   _1;
+          |  optional binary  _2(UTF8);
+          |}
+        """.stripMargin
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(
+        schema,
+        path,
+        dictionaryEnabled = dictionaryEnabled,
+        pageSize = pageSize,
+        dictionaryPageSize = pageSize,
+        pageRowCountLimit = pageRowCountLimit)
+
+      expected.foreach { row =>
+        val record = new SimpleGroup(schema)
+        record.add(0, row.getInt(0))
+        record.add(1, row.getString(1))
+        writer.write(record)
+      }
+
+      writer.close()
+    }
+
+    val skip = Row(0, "a") // row to skip by lazy materialization
+    val read = Row(1, "b") // row not to skip
+    // The initial page row count is always 100 in ParquetWriter, even with pageRowCountLimit config
+    // Thus, use this header to fill in the first 100
+    val header = Seq.fill(100)(skip)
+
+    val expected = Seq( // spotless:off
+      read, read, read, read, // read all rows in the page
+      skip, skip, skip, skip, // skip all rows in the page
+      skip, skip, skip, skip, // consecutively skip all rows in the page
+      read, skip, skip, read, // skip middle rows in the page
+      skip, read, read, skip, // read middle rows in the page
+      skip, read, skip, read, // skip and read in turns
+      read, skip, read, skip // skip and read in turns
+    ) // spotless:on
+
+    withTempDir { dir =>
+      val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+      withSQLConf(
+        CometConf.COMET_BATCH_SIZE.key -> "4",
+        CometConf.COMET_EXEC_ENABLED.key -> "false") {
+        makeRawParquetFile(path, dictionaryEnabled = false, 1024, 4, header ++ expected)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(df.filter("_1 != 0"), expected.filter(_.getInt(0) != 0))
+        }
+      }
+    }
+  }
+
+  test("test multiple pages with mixed PLAIN_DICTIONARY and PLAIN encoding") {
+    // TODO: consider merging this with the same method above
+    def makeRawParquetFile(path: Path, n: Int): Seq[Option[Int]] = {
+      val dictionaryPageSize = 1024
+      val pageRowCount = 500
+      val schemaStr =
+        """
+          |message root {
+          |  optional boolean _1;
+          |  optional int32   _2(INT_8);
+          |  optional int32   _3(INT_16);
+          |  optional int32   _4;
+          |  optional int64   _5;
+          |  optional float   _6;
+          |  optional double  _7;
+          |  optional binary  _8(UTF8);
+          |}
+        """.stripMargin
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(
+        schema,
+        path,
+        dictionaryEnabled = true,
+        dictionaryPageSize = dictionaryPageSize,
+        pageRowCountLimit = pageRowCount)
+
+      val rand = scala.util.Random
+      val expected = (0 until n).map { i =>
+        // use a single value for the first page, to make sure dictionary encoding kicks in
+        val value = if (i < pageRowCount) i % 8 else i
+        if (rand.nextBoolean()) None
+        else Some(value)
+      }
+
+      expected.foreach { opt =>
+        val record = new SimpleGroup(schema)
+        opt match {
+          case Some(i) =>
+            record.add(0, i % 2 == 0)
+            record.add(1, i.toByte)
+            record.add(2, i.toShort)
+            record.add(3, i)
+            record.add(4, i.toLong)
+            record.add(5, i.toFloat)
+            record.add(6, i.toDouble)
+            record.add(7, i.toString * 100)
+          case _ =>
+        }
+        writer.write(record)
+      }
+
+      writer.close()
+      expected
+    }
+
+    Seq(16, 128).foreach { batchSize =>
+      withSQLConf(CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
+        withTempDir { dir =>
+          val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+          val expected = makeRawParquetFile(path, 10000)
+          readParquetFile(path.toString) { df =>
+            checkAnswer(
+              df,
+              expected.map {
+                case None =>
+                  Row(null, null, null, null, null, null, null, null)
+                case Some(i) =>
+                  Row(
+                    i % 2 == 0,
+                    i.toByte,
+                    i.toShort,
+                    i,
+                    i.toLong,
+                    i.toFloat,
+                    i.toDouble,
+                    i.toString * 100)
+              })
+          }
+        }
+      }
+    }
+  }
+
+  test("skip vector re-loading") {
+    Seq(false, true).foreach { enableDictionary =>
+      withSQLConf(
+        CometConf.COMET_BATCH_SIZE.key -> 7.toString,
+        CometConf.COMET_EXEC_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+        // Make sure this works with Comet native execution too
+        val data = (1 to 100)
+          .map(_ % 5) // trigger dictionary encoding
+          .map(i => (i, i.toByte, i.toShort, i.toFloat, i.toDouble, i.toString))
+        withParquetTable(data, "tbl", withDictionary = enableDictionary) {
+          val df = sql("SELECT count(*) FROM tbl WHERE _1 >= 0")
+          checkAnswer(df, Row(100) :: Nil)
+        }
+      }
+    }
+  }
+
+  test("partition column types") {
+    withTempPath { dir =>
+      Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath)
+
+      val dataTypes =
+        Seq(
+          StringType,
+          BooleanType,
+          ByteType,
+          BinaryType,
+          ShortType,
+          IntegerType,
+          LongType,
+          FloatType,
+          DoubleType,
+          DecimalType(25, 5),
+          DateType,
+          TimestampType)
+
+      // TODO: support `NullType` here, after we add the support in `ColumnarBatchRow`
+      val constantValues =
+        Seq(
+          UTF8String.fromString("a string"),
+          true,
+          1.toByte,
+          "Spark SQL".getBytes,
+          2.toShort,
+          3,
+          Long.MaxValue,
+          0.25.toFloat,
+          0.75d,
+          Decimal("1234.23456"),
+          DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
+          DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")))
+
+      dataTypes.zip(constantValues).foreach { case (dt, v) =>
+        val schema = StructType(StructField("pcol", dt) :: Nil)
+        val conf = SQLConf.get
+        val partitionValues = new GenericInternalRow(Array(v))
+        val file = dir
+          .listFiles(new FileFilter {
+            override def accept(pathname: File): Boolean =
+              pathname.isFile && pathname.toString.endsWith("parquet")
+          })
+          .head
+        val reader = new BatchReader(
+          file.toString,
+          CometConf.COMET_BATCH_SIZE.get(conf),
+          schema,
+          partitionValues)
+        reader.init()
+
+        try {
+          reader.nextBatch()
+          val batch = reader.currentBatch()
+          val actual = batch.getRow(0).get(1, dt)
+          val expected = v
+          if (dt.isInstanceOf[BinaryType]) {
+            assert(
+              actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]])
+          } else {
+            assert(actual == expected)
+          }
+        } finally {
+          reader.close()
+        }
+      }
+    }
+  }
+
+  test("partition columns - multiple batch") {
+    withSQLConf(
+      CometConf.COMET_BATCH_SIZE.key -> 7.toString,
+      CometConf.COMET_EXEC_ENABLED.key -> "false",
+      CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+      CometConf.COMET_ENABLED.key -> "true") {
+      Seq("a", null).foreach { partValue =>
+        withTempPath { dir =>
+          (1 to 100)
+            .map(v => (partValue.asInstanceOf[String], v))
+            .toDF("pcol", "col")
+            .repartition(1)
+            .write
+            .format("parquet")
+            .partitionBy("pcol")
+            .save(dir.getCanonicalPath)
+          val df = spark.read.format("parquet").load(dir.getCanonicalPath)
+          assert(df.filter("col > 90").count() == 10)
+        }
+      }
+    }
+  }
+
+  test("fix: string partition column with incorrect offset buffer") {
+    def makeRawParquetFile(
+        path: Path,
+        dictionaryEnabled: Boolean,
+        n: Int,
+        pageSize: Int): Seq[Option[Int]] = {
+      val schemaStr =
+        """
+          |message root {
+          |  optional binary                  _1(UTF8);
+          |}
+    """.stripMargin
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(
+        schema,
+        path,
+        dictionaryEnabled = dictionaryEnabled,
+        pageSize = pageSize,
+        dictionaryPageSize = pageSize,
+        rowGroupSize = 1024 * 128)
+
+      val rand = scala.util.Random
+      val expected = (0 until n).map { i =>
+        if (rand.nextBoolean()) {
+          None
+        } else {
+          Some(i)
+        }
+      }
+      expected.foreach { opt =>
+        val record = new SimpleGroup(schema)
+        opt match {
+          case Some(i) =>
+            record.add(0, i.toString * 48)
+          case _ =>
+        }
+        writer.write(record)
+      }
+
+      writer.close()
+      expected
+    }
+
+    withTable("tbl") {
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path, false, 10000, 128)
+
+        sql("CREATE TABLE tbl (value STRING, p STRING) USING PARQUET PARTITIONED BY (p) ")
+        sql(s"ALTER TABLE tbl ADD PARTITION (p='a') LOCATION '$dir'")
+        assert(sql("SELECT DISTINCT p FROM tbl").count() == 1)
+      }
+    }
+
+  }
+
+  test("missing columns") {
+    withTempPath { dir =>
+      Seq("a", "b").toDF("col1").write.parquet(dir.getCanonicalPath)
+
+      // Create a schema where `col2` doesn't exist in the file schema
+      var schema =
+        StructType(Seq(StructField("col1", StringType), StructField("col2", IntegerType)))
+      var df = spark.read.schema(schema).parquet(dir.getCanonicalPath)
+      checkAnswer(df, Row("a", null) :: Row("b", null) :: Nil)
+
+      // Should be the same when the missing column is at the beginning of the schema
+
+      schema = StructType(Seq(StructField("col0", BooleanType), StructField("col1", StringType)))
+      df = spark.read.schema(schema).parquet(dir.getCanonicalPath)
+      checkAnswer(df, Row(null, "a") :: Row(null, "b") :: Nil)
+    }
+  }
+
+  test("unsigned int supported") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      def makeRawParquetFile(path: Path): Unit = {
+        val schemaStr =
+          """message root {
+            |  required INT32 a(UINT_8);
+            |  required INT32 b(UINT_16);
+            |  required INT32 c(UINT_32);
+            |}
+        """.stripMargin
+        val schema = MessageTypeParser.parseMessageType(schemaStr)
+
+        val writer = createParquetWriter(schema, path, dictionaryEnabled)
+
+        (0 until 10).foreach { n =>
+          val record = new SimpleGroup(schema)
+          record.add(0, n.toByte + Byte.MaxValue)
+          record.add(1, n.toShort + Short.MaxValue)
+          record.add(2, n + Int.MaxValue)
+          writer.write(record)
+        }
+        writer.close()
+      }
+
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(
+            df,
+            (0 until 10).map(n =>
+              Row(n.toByte + Byte.MaxValue, n.toShort + Short.MaxValue, n + Int.MaxValue.toLong)))
+        }
+      }
+    }
+  }
+
+  test("unsigned long supported") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      def makeRawParquetFile(path: Path): Unit = {
+        val schemaStr =
+          """message root {
+            |  required INT64 a(UINT_64);
+            |}
+        """.stripMargin
+        val schema = MessageTypeParser.parseMessageType(schemaStr)
+
+        val writer = createParquetWriter(schema, path, dictionaryEnabled)
+
+        (0 until 10).map(_.toLong).foreach { n =>
+          val record = new SimpleGroup(schema)
+          record.add(0, n + Long.MaxValue)
+          writer.write(record)
+        }
+        writer.close()
+      }
+
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(
+            df,
+            (0 until 10).map(n =>
+              Row(
+                new BigDecimal(UnsignedLong.fromLongBits(n + Long.MaxValue).bigIntegerValue()))))
+        }
+      }
+    }
+  }
+
+  test("enum support") {
+    // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+    // "enum type should interpret ENUM annotated field as a UTF-8"
+    Seq(true, false).foreach { dictionaryEnabled =>
+      def makeRawParquetFile(path: Path): Unit = {
+        val schemaStr =
+          """message root {
+            |  required BINARY a(ENUM);
+            |}
+        """.stripMargin
+        val schema = MessageTypeParser.parseMessageType(schemaStr)
+
+        val writer = createParquetWriter(schema, path, dictionaryEnabled)
+
+        (0 until 10).map(_.toLong).foreach { n =>
+          val record = new SimpleGroup(schema)
+          record.add(0, n.toString)
+          writer.write(record)
+        }
+        writer.close()
+      }
+
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(df, (0 until 10).map(n => Row(n.toString)))
+        }
+      }
+    }
+  }
+
+  test("FIXED_LEN_BYTE_ARRAY support") {
+    assume(isSpark34Plus)
+    Seq(true, false).foreach { dictionaryEnabled =>
+      def makeRawParquetFile(path: Path): Unit = {
+        val schemaStr =
+          """message root {
+            |  required FIXED_LEN_BYTE_ARRAY(1) a;
+            |  required FIXED_LEN_BYTE_ARRAY(3) b;
+            |}
+        """.stripMargin
+        val schema = MessageTypeParser.parseMessageType(schemaStr)
+
+        val writer = createParquetWriter(schema, path, dictionaryEnabled)
+
+        (0 until 10).map(_.toString).foreach { n =>
+          val record = new SimpleGroup(schema)
+          record.add(0, n)
+          record.add(1, n + n + n)
+          writer.write(record)
+        }
+        writer.close()
+      }
+
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path)
+        readParquetFile(path.toString) { df =>
+          checkAnswer(
+            df,
+            (48 until 58).map(n => // char '0' is 48 in ascii
+              Row(Array(n), Array(n, n, n))))
+        }
+      }
+    }
+  }
+
+  test("schema evolution") {
+    Seq(true, false).foreach { enableSchemaEvolution =>
+      Seq(true, false).foreach { useDictionary =>
+        {
+          withSQLConf(
+            CometConf.COMET_SCHEMA_EVOLUTION_ENABLED.key -> enableSchemaEvolution.toString) {
+            val data = (0 until 100).map(i => {
+              val v = if (useDictionary) i % 5 else i
+              (v, v.toFloat)
+            })
+            val readSchema =
+              StructType(
+                Seq(StructField("_1", LongType, false), StructField("_2", DoubleType, false)))
+
+            withParquetDataFrame(data, schema = Some(readSchema)) { df =>
+              if (enableSchemaEvolution) {
+                checkAnswer(df, data.map(Row.fromTuple))
+              } else {
+                assertThrows[SparkException](df.collect())
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("scan metrics") {
+    val metricNames = Seq(
+      "ParquetRowGroups",
+      "ParquetNativeDecodeTime",
+      "ParquetNativeLoadTime",
+      "ParquetLoadRowGroupTime",
+      "ParquetInputFileReadTime",
+      "ParquetInputFileReadSize",
+      "ParquetInputFileReadThroughput")
+
+    withParquetTable((0 until 10000).map(i => (i, i.toDouble)), "tbl") {
+      val df = sql("SELECT * FROM tbl WHERE _1 > 0")
+      val scans = df.queryExecution.executedPlan collect {
+        case s: CometScanExec => s
+        case s: CometBatchScanExec => s
+      }
+      assert(scans.size == 1, s"Expect one scan node but found ${scans.size}")
+      val metrics = scans.head.metrics
+      metricNames.foreach { metricName =>
+        assert(metrics.contains(metricName), s"metric $metricName was not found")
+      }
+
+      df.collect()
+
+      metricNames.foreach { metricName =>
+        assert(
+          metrics(metricName).value > 0,
+          s"Expect metric value for $metricName to be positive")
+      }
+    }
+  }
+
+  test("read dictionary encoded decimals written as FIXED_LEN_BYTE_ARRAY") {
+    // In this test, data is encoded using Parquet page v2 format, but with PLAIN encoding
+    checkAnswer(
+      // Decimal column in this file is encoded using plain dictionary
+      readResourceParquetFile("test-data/dec-in-fixed-len.parquet"),
+      spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'fixed_len_dec))
+  }
+
+  test("read long decimals with precision <= 9") {
+    // decimal32-written-as-64-bit.snappy.parquet was generated using a 3rd-party library. It has
+    // 10 rows of Decimal(9, 1) written as LongDecimal instead of an IntDecimal
+    var df = readResourceParquetFile("test-data/decimal32-written-as-64-bit.snappy.parquet")
+    assert(10 == df.collect().length)
+    var first10Df = df.head(10)
+    assert(
+      Seq(792059492, 986842987, 540247998, null, 357991078, 494131059, 92536396, 426847157,
+        -999999999, 204486094)
+        .zip(first10Df)
+        .forall(d =>
+          d._2.isNullAt(0) && d._1 == null ||
+            d._1 == d._2.getDecimal(0).unscaledValue().intValue()))
+
+    // decimal32-written-as-64-bit-dict.snappy.parquet was generated using a 3rd-party library. It
+    // has 2048 rows of Decimal(3, 1) written as LongDecimal instead of an IntDecimal
+    df = readResourceParquetFile("test-data/decimal32-written-as-64-bit-dict.snappy.parquet")
+    assert(2048 == df.collect().length)
+    first10Df = df.head(10)
+    assert(
+      Seq(751, 937, 511, null, 337, 467, 84, 403, -999, 190)
+        .zip(first10Df)
+        .forall(d =>
+          d._2.isNullAt(0) && d._1 == null ||
+            d._1 == d._2.getDecimal(0).unscaledValue().intValue()))
+
+    val last10Df = df.tail(10)
+    assert(
+      Seq(866, 20, 492, 76, 824, 604, 343, 820, 864, 243)
+        .zip(last10Df)
+        .forall(d => d._1 == d._2.getDecimal(0).unscaledValue().intValue()))
+  }
+
+  private val actions: Seq[DataFrame => DataFrame] = Seq(
+    "_1 = 500",
+    "_1 = 500 or _1 = 1500",
+    "_1 = 500 or _1 = 501 or _1 = 1500",
+    "_1 = 500 or _1 = 501 or _1 = 1000 or _1 = 1500",
+    "_1 >= 500 and _1 < 1000",
+    "(_1 >= 500 and _1 < 1000) or (_1 >= 1500 and _1 < 1600)").map(f =>
+    (df: DataFrame) => df.filter(f))
+
+  test("test lazy materialization when batch size is small") {
+    val df = spark.range(0, 2000).selectExpr("id as _1", "cast(id as string) as _11")
+    checkParquetDataFrame(df)(actions: _*)
+  }
+
+  test("test lazy materialization when batch size is small (dict encode)") {
+    val df = spark.range(0, 2000).selectExpr("id as _1", "cast(id % 10 as string) as _11")
+    checkParquetDataFrame(df)(actions: _*)
+  }
+
+  private def testStandardAndLegacyModes(testName: String)(f: => Unit): Unit = {
+    test(s"Standard mode - $testName") {
+      withSQLConf(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> "false") { f }
+    }
+
+    test(s"Legacy mode - $testName") {
+      withSQLConf(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> "true") { f }
+    }
+  }
+
+  private def checkParquetFile[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      f: Row => Boolean = _ => true): Unit = {
+    withParquetDataFrame(data)(r => checkAnswer(r.filter(f), data.map(Row.fromTuple).filter(f)))
+  }
+
+  protected def checkParquetScan[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      f: Row => Boolean = _ => true): Unit
+
+  /**
+   * create parquet file with various page sizes and batch sizes
+   */
+  private def checkParquetDataFrame(df: DataFrame)(actions: (DataFrame => DataFrame)*): Unit = {
+    Seq(true, false).foreach { enableDictionary =>
+      Seq(64, 127, 4049).foreach { pageSize =>
+        withTempPath(file => {
+          df.coalesce(1)
+            .write
+            .option("parquet.page.size", pageSize.toString)
+            .option("parquet.enable.dictionary", enableDictionary.toString)
+            .parquet(file.getCanonicalPath)
+
+          Seq(true, false).foreach { useLazyMaterialization =>
+            Seq(true, false).foreach { enableCometExec =>
+              Seq(4, 13, 4049).foreach { batchSize =>
+                withSQLConf(
+                  CometConf.COMET_BATCH_SIZE.key -> batchSize.toString,
+                  CometConf.COMET_EXEC_ENABLED.key -> enableCometExec.toString,
+                  CometConf.COMET_USE_LAZY_MATERIALIZATION.key -> useLazyMaterialization.toString) {
+                  readParquetFile(file.getCanonicalPath) { parquetDf =>
+                    actions.foreach { action =>
+                      checkAnswer(action(parquetDf), action(df))
+                    }
+                  }
+                }
+              }
+            }
+          }
+        })
+      }
+    }
+  }
+
+  test("row group skipping doesn't overflow when reading into larger type") {
+    assume(isSpark34Plus)
+
+    withTempPath { path =>
+      Seq(0).toDF("a").write.parquet(path.toString)
+      // Reading integer 'a' as a long isn't supported. Check that an exception is raised instead
+      // of incorrectly skipping the single row group and producing incorrect results.
+      val exception = intercept[SparkException] {
+        spark.read
+          .schema("a LONG")
+          .parquet(path.toString)
+          .where(s"a < ${Long.MaxValue}")
+          .collect()
+      }
+      assert(exception.getCause.getCause.isInstanceOf[SchemaColumnConvertNotSupportedException])
+    }
+  }
+
+  test("test pre-fetching multiple files") {
+    def makeRawParquetFile(
+        path: Path,
+        dictionaryEnabled: Boolean,
+        n: Int,
+        pageSize: Int): Seq[Option[Int]] = {
+      val schemaStr =
+        """
+          |message root {
+          |  optional boolean _1;
+          |  optional int32   _2(INT_8);
+          |  optional int32   _3(INT_16);
+          |  optional int32   _4;
+          |  optional int64   _5;
+          |  optional float   _6;
+          |  optional double  _7;
+          |  optional binary  _8(UTF8);
+          |  optional int32   _9(UINT_8);
+          |  optional int32   _10(UINT_16);
+          |  optional int32   _11(UINT_32);
+          |  optional int64   _12(UINT_64);
+          |  optional binary  _13(ENUM);
+          |}
+        """.stripMargin
+
+      val schema = MessageTypeParser.parseMessageType(schemaStr)
+      val writer = createParquetWriter(
+        schema,
+        path,
+        dictionaryEnabled = dictionaryEnabled,
+        pageSize = pageSize,
+        dictionaryPageSize = pageSize)
+
+      val rand = scala.util.Random
+      val expected = (0 until n).map { i =>
+        if (rand.nextBoolean()) {
+          None
+        } else {
+          Some(i)
+        }
+      }
+      expected.foreach { opt =>
+        val record = new SimpleGroup(schema)
+        opt match {
+          case Some(i) =>
+            record.add(0, i % 2 == 0)
+            record.add(1, i.toByte)
+            record.add(2, i.toShort)
+            record.add(3, i)
+            record.add(4, i.toLong)
+            record.add(5, i.toFloat)
+            record.add(6, i.toDouble)
+            record.add(7, i.toString * 48)
+            record.add(8, (-i).toByte)
+            record.add(9, (-i).toShort)
+            record.add(10, -i)
+            record.add(11, (-i).toLong)
+            record.add(12, i.toString)
+          case _ =>
+        }
+        writer.write(record)
+      }
+
+      writer.close()
+      expected
+    }
+
+    val conf = new Configuration()
+    conf.set("spark.comet.scan.preFetch.enabled", "true");
+    conf.set("spark.comet.scan.preFetch.threadNum", "4");
+
+    withTempDir { dir =>
+      val threadPool = CometPrefetchThreadPool.getOrCreateThreadPool(2)
+
+      val readers = (0 to 10).map { idx =>
+        val path = new Path(dir.toURI.toString, s"part-r-$idx.parquet")
+        makeRawParquetFile(path, dictionaryEnabled = false, 10000, 500)
+
+        val reader = new BatchReader(conf, path.toString, 1000, null, null)
+        reader.submitPrefetchTask(threadPool)
+
+        reader
+      }
+
+      // Wait for all pre-fetch tasks
+      readers.foreach { reader =>
+        val task = reader.getPrefetchTask()
+        task.get()
+      }
+
+      val totolRows = readers.map { reader =>
+        val queue = reader.getPrefetchQueue()
+        var rowCount = 0L
+
+        while (!queue.isEmpty) {
+          val rowGroup = queue.take().getLeft
+          rowCount += rowGroup.getRowCount
+        }
+
+        reader.close()
+
+        rowCount
+      }.sum
+
+      readParquetFile(dir.toString) { df =>
+        assert(df.count() == totolRows)
+      }
+    }
+  }
+  def testScanner(cometEnabled: String, scanner: String, v1: Option[String] = None): Unit = {
+    withSQLConf(
+      CometConf.COMET_ENABLED.key -> cometEnabled,
+      CometConf.COMET_EXEC_ENABLED.key -> cometEnabled,
+      CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> cometEnabled,
+      SQLConf.USE_V1_SOURCE_LIST.key -> v1.getOrElse("")) {
+      withParquetTable(Seq((Long.MaxValue, 1), (Long.MaxValue, 2)), "tbl") {
+        val df = spark.sql("select * from tbl")
+        assert(
+          stripAQEPlan(df.queryExecution.executedPlan)
+            .collectLeaves()
+            .head
+            .toString()
+            .startsWith(scanner))
+      }
+    }
+  }
+
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    Seq(true, false).foreach { prefetch =>
+      val cometTestName = if (prefetch) {
+        testName + " (prefetch enabled)"
+      } else {
+        testName
+      }
+
+      super.test(cometTestName, testTags: _*) {
+        withSQLConf(CometConf.COMET_SCAN_PREFETCH_ENABLED.key -> prefetch.toString) {
+          testFun
+        }
+      }
+    }
+  }
+}
+
+class ParquetReadV1Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper {
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    super.test(testName, testTags: _*)(withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+      testFun
+    })(pos)
+  }
+
+  override def checkParquetScan[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      f: Row => Boolean = _ => true): Unit = {
+    withParquetDataFrame(data) { r =>
+      val scans = collect(r.filter(f).queryExecution.executedPlan) { case p: CometScanExec =>
+        p
+      }
+      if (CometConf.COMET_ENABLED.get()) {
+        assert(scans.nonEmpty)
+      } else {
+        assert(scans.isEmpty)
+      }
+    }
+  }
+
+  test("Test V1 parquet scan uses respective scanner") {
+    Seq(("false", "FileScan parquet"), ("true", "CometScan parquet")).foreach {
+      case (cometEnabled, expectedScanner) =>
+        testScanner(cometEnabled, scanner = expectedScanner, v1 = Some("parquet"))
+    }
+  }
+}
+
+class ParquetReadV2Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper {
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    super.test(testName, testTags: _*)(withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      testFun
+    })(pos)
+  }
+
+  override def checkParquetScan[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      f: Row => Boolean = _ => true): Unit = {
+    withParquetDataFrame(data) { r =>
+      val scans = collect(r.filter(f).queryExecution.executedPlan) { case p: CometBatchScanExec =>
+        p.scan
+      }
+      if (CometConf.COMET_ENABLED.get()) {
+        assert(scans.nonEmpty && scans.forall(_.isInstanceOf[CometParquetScan]))
+      } else {
+        assert(!scans.exists(_.isInstanceOf[CometParquetScan]))
+      }
+    }
+  }
+
+  test("Test V2 parquet scan uses respective scanner") {
+    Seq(("false", "BatchScan"), ("true", "CometBatchScan")).foreach {
+      case (cometEnabled, expectedScanner) =>
+        testScanner(cometEnabled, scanner = expectedScanner, v1 = None)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/CometPluginsSuite.scala b/spark/src/test/scala/org/apache/spark/CometPluginsSuite.scala
new file mode 100644
index 000000000..a95674118
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/CometPluginsSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.sql.CometTestBase
+
+class CometPluginsSuite extends CometTestBase {
+  override protected def sparkConf: SparkConf = {
+    val conf = new SparkConf()
+    conf.set("spark.driver.memory", "1G")
+    conf.set("spark.executor.memory", "1G")
+    conf.set("spark.executor.memoryOverhead", "2G")
+    conf.set("spark.plugins", "org.apache.spark.CometPlugin")
+    conf.set("spark.comet.enabled", "true")
+    conf.set("spark.comet.exec.enabled", "true")
+    conf
+  }
+
+  test("Default Comet memory overhead") {
+    val execMemOverhead1 = spark.conf.get("spark.executor.memoryOverhead")
+    val execMemOverhead2 = spark.sessionState.conf.getConfString("spark.executor.memoryOverhead")
+    val execMemOverhead3 = spark.sparkContext.getConf.get("spark.executor.memoryOverhead")
+    val execMemOverhead4 = spark.sparkContext.conf.get("spark.executor.memoryOverhead")
+
+    // 2GB + 384MB (default Comet memory overhead)
+    assert(execMemOverhead1 == "2432M")
+    assert(execMemOverhead2 == "2432M")
+    assert(execMemOverhead3 == "2432M")
+    assert(execMemOverhead4 == "2432M")
+  }
+}
+
+class CometPluginsDefaultSuite extends CometTestBase {
+  override protected def sparkConf: SparkConf = {
+    val conf = new SparkConf()
+    conf.set("spark.driver.memory", "1G")
+    conf.set("spark.executor.memory", "1G")
+    conf.set("spark.executor.memoryOverheadFactor", "0.5")
+    conf.set("spark.plugins", "org.apache.spark.CometPlugin")
+    conf.set("spark.comet.enabled", "true")
+    conf.set("spark.comet.exec.enabled", "true")
+    conf
+  }
+
+  test("Default executor memory overhead + Comet memory overhead") {
+    val execMemOverhead1 = spark.conf.get("spark.executor.memoryOverhead")
+    val execMemOverhead2 = spark.sessionState.conf.getConfString("spark.executor.memoryOverhead")
+    val execMemOverhead3 = spark.sparkContext.getConf.get("spark.executor.memoryOverhead")
+    val execMemOverhead4 = spark.sparkContext.conf.get("spark.executor.memoryOverhead")
+
+    // Spark executor memory overhead = executor memory (1G) * memoryOverheadFactor (0.5) = 512MB
+    // 512MB + 384MB (default Comet memory overhead)
+    assert(execMemOverhead1 == "896M")
+    assert(execMemOverhead2 == "896M")
+    assert(execMemOverhead3 == "896M")
+    assert(execMemOverhead4 == "896M")
+  }
+}
+
+class CometPluginsNonOverrideSuite extends CometTestBase {
+  override protected def sparkConf: SparkConf = {
+    val conf = new SparkConf()
+    conf.set("spark.driver.memory", "1G")
+    conf.set("spark.executor.memory", "1G")
+    conf.set("spark.executor.memoryOverhead", "2G")
+    conf.set("spark.executor.memoryOverheadFactor", "0.5")
+    conf.set("spark.plugins", "org.apache.spark.CometPlugin")
+    conf.set("spark.comet.enabled", "true")
+    conf
+  }
+
+  test("executor memory overhead is not overridden") {
+    val execMemOverhead1 = spark.conf.get("spark.executor.memoryOverhead")
+    val execMemOverhead2 = spark.sessionState.conf.getConfString("spark.executor.memoryOverhead")
+    val execMemOverhead3 = spark.sparkContext.getConf.get("spark.executor.memoryOverhead")
+    val execMemOverhead4 = spark.sparkContext.conf.get("spark.executor.memoryOverhead")
+
+    assert(execMemOverhead1 == "2G")
+    assert(execMemOverhead2 == "2G")
+    assert(execMemOverhead3 == "2G")
+    assert(execMemOverhead4 == "2G")
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueriesList.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueriesList.scala
new file mode 100644
index 000000000..c83113231
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueriesList.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark.{nameSuffixForQueriesV2_7, tpcdsQueries, tpcdsQueriesV2_7}
+import org.apache.spark.sql.benchmark.TPCDSSchemaHelper
+import org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark.tables
+import org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmarkArguments
+
+/**
+ * Utility to list Comet execution enabling status for TPCDS queries.
+ *
+ * To run this benchmark:
+ * {{{
+ * // Build [tpcds-kit](https://github.com/databricks/tpcds-kit)
+ * cd /tmp && git clone https://github.com/databricks/tpcds-kit.git
+ * cd tpcds-kit/tools && make OS=MACOS
+ *
+ * // GenTPCDSData
+ * cd $COMET_HOME && mkdir /tmp/tpcds
+ * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1
+ *
+ * // CometTPCDSQueriesList
+ * make benchmark-org.apache.spark.sql.CometTPCDSQueriesList -- --data-location /tmp/tpcds
+ * }}}
+ *
+ * Results will be written to "spark/inspections/CometTPCDSQueriesList-results.txt".
+ */
+object CometTPCDSQueriesList extends CometTPCQueryListBase with CometTPCQueryBase with Logging {
+  override def runSuite(mainArgs: Array[String]): Unit = {
+    val benchmarkArgs = new TPCDSQueryBenchmarkArguments(mainArgs)
+
+    // If `--query-filter` defined, filters the queries that this option selects
+    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
+    val queriesV2_7ToRun = filterQueries(
+      tpcdsQueriesV2_7,
+      benchmarkArgs.queryFilter,
+      nameSuffix = nameSuffixForQueriesV2_7)
+
+    if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
+      throw new RuntimeException(
+        s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
+    }
+
+    setupTables(
+      benchmarkArgs.dataLocation,
+      createTempView = false,
+      tables,
+      TPCDSSchemaHelper.getTableColumns)
+
+    setupCBO(cometSpark, benchmarkArgs.cboEnabled, tables)
+
+    runQueries("tpcds", queries = queriesV1_4ToRun)
+    runQueries("tpcds-v2.7.0", queries = queriesV2_7ToRun, nameSuffix = nameSuffixForQueriesV2_7)
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala
new file mode 100644
index 000000000..f8213734b
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkConf
+
+import org.apache.comet.CometConf
+
+class CometTPCDSQuerySuite
+    extends {
+      // This is private in `TPCDSBase`.
+      val excludedTpcdsQueries: Seq[String] = Seq("q34", "q41", "q64")
+
+      // This is private in `TPCDSBase` and `excludedTpcdsQueries` is private too.
+      // So we cannot override `excludedTpcdsQueries` to exclude the queries.
+      val tpcdsAllQueries: Seq[String] = Seq(
+        "q1",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "q7",
+        "q8",
+        "q9",
+        "q10",
+        "q11",
+        "q12",
+        "q13",
+        "q14a",
+        "q14b",
+        "q15",
+        "q16",
+        "q17",
+        "q18",
+        "q19",
+        "q20",
+        "q21",
+        "q22",
+        "q23a",
+        "q23b",
+        "q24a",
+        "q24b",
+        "q25",
+        "q26",
+        "q27",
+        "q28",
+        "q29",
+        "q30",
+        "q31",
+        "q32",
+        "q33",
+        "q34",
+        "q35",
+        "q36",
+        "q37",
+        "q38",
+        "q39a",
+        "q39b",
+        "q40",
+        "q41",
+        "q42",
+        "q43",
+        "q44",
+        "q45",
+        "q46",
+        "q47",
+        "q48",
+        "q49",
+        "q50",
+        "q51",
+        "q52",
+        "q53",
+        "q54",
+        "q55",
+        "q56",
+        "q57",
+        "q58",
+        "q59",
+        "q60",
+        "q61",
+        "q62",
+        "q63",
+        "q64",
+        "q65",
+        "q66",
+        "q67",
+        "q68",
+        "q69",
+        "q70",
+        "q71",
+        "q72",
+        "q73",
+        "q74",
+        "q75",
+        "q76",
+        "q77",
+        "q78",
+        "q79",
+        "q80",
+        "q81",
+        "q82",
+        "q83",
+        "q84",
+        "q85",
+        "q86",
+        "q87",
+        "q88",
+        "q89",
+        "q90",
+        "q91",
+        "q92",
+        "q93",
+        "q94",
+        "q95",
+        "q96",
+        "q97",
+        "q98",
+        "q99")
+
+      // TODO: enable the 3 queries after fixing the issues #1358.
+      override val tpcdsQueries: Seq[String] =
+        tpcdsAllQueries.filterNot(excludedTpcdsQueries.contains)
+
+    }
+    with TPCDSQueryTestSuite {
+  override def sparkConf: SparkConf = {
+    val conf = super.sparkConf
+    conf.set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
+    conf.set(CometConf.COMET_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ENABLED.key, "true")
+    conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "2g")
+    conf.set(CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ALL_EXPR_ENABLED.key, "true")
+    conf
+  }
+
+  override protected val baseResourcePath: String = {
+    getWorkspaceFilePath(
+      "spark",
+      "src",
+      "test",
+      "resources",
+      "tpcds-query-results").toFile.getAbsolutePath
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQueriesList.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQueriesList.scala
new file mode 100644
index 000000000..3d8e1ed02
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQueriesList.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmarkArguments
+
+/**
+ * Utility to list Comet execution enabling status for TPCH queries.
+ *
+ * To run this benchmark:
+ * {{{
+ * // Set scale factor in GB
+ * scale_factor=1
+ *
+ * // GenTPCHData to create the data set at /tmp/tpch/sf1_parquet
+ * cd $COMET_HOME
+ * make benchmark-org.apache.spark.sql.GenTPCHData -- --location /tmp --scaleFactor ${scale_factor}
+ *
+ * // CometTPCHQueriesList
+ * make benchmark-org.apache.spark.sql.CometTPCHQueriesList -- --data-location /tmp/tpch/sf${scale_factor}_parquet
+ * }}}
+ *
+ * Results will be written to "spark/inspections/CometTPCHQueriesList-results.txt".
+ */
+object CometTPCHQueriesList extends CometTPCQueryListBase with CometTPCQueryBase with Logging {
+  val tables: Seq[String] =
+    Seq("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier")
+
+  override def runSuite(mainArgs: Array[String]): Unit = {
+    val benchmarkArgs = new TPCDSQueryBenchmarkArguments(mainArgs)
+
+    // List of all TPC-H queries
+    val tpchQueries = (1 to 22).map(n => s"q$n")
+
+    // If `--query-filter` defined, filters the queries that this option selects
+    val queries = filterQueries(tpchQueries, benchmarkArgs.queryFilter)
+
+    if (queries.isEmpty) {
+      throw new RuntimeException(
+        s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
+    }
+
+    setupTables(benchmarkArgs.dataLocation, createTempView = !benchmarkArgs.cboEnabled, tables)
+
+    setupCBO(cometSpark, benchmarkArgs.cboEnabled, tables)
+
+    runQueries("tpch", queries, " TPCH Snappy")
+    runQueries("tpch-extended", queries, " TPCH Extended Snappy")
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala
new file mode 100644
index 000000000..5f1ba6296
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+import java.nio.file.{Files, Paths}
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.util.{fileToString, resourceToString, stringToFile}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession}
+
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+/**
+ * End-to-end tests to check TPCH query results.
+ *
+ * To run this test suite:
+ * {{{
+ *   SPARK_TPCH_DATA=<path of TPCH SF=1 data>
+ *     ./mvnw -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
+ * }}}
+ *
+ * To re-generate golden files for this suite, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 SPARK_TPCH_DATA=<path of TPCH SF=1 data>
+ *     ./mvnw -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
+ * }}}
+ */
+class CometTPCHQuerySuite extends QueryTest with CometTPCBase with SQLQueryTestHelper {
+
+  private val tpchDataPath = sys.env.get("SPARK_TPCH_DATA")
+
+  val tpchQueries: Seq[String] = Seq(
+    "q1",
+    "q2",
+    "q3",
+    "q4",
+    "q5",
+    "q6",
+    "q7",
+    "q8",
+    "q9",
+    "q10",
+    "q11",
+    "q12",
+    "q13",
+    "q14",
+    "q15",
+    "q16",
+    "q17",
+    "q18",
+    "q19",
+    "q20",
+    "q21",
+    "q22")
+  val disabledTpchQueries: Seq[String] = Seq("q1", "q15", "q17", "q18")
+
+  // To make output results deterministic
+  def testSparkConf: SparkConf = {
+    val conf = super.sparkConf
+    conf.set(SQLConf.SHUFFLE_PARTITIONS.key, "1")
+    conf.set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
+    conf.set(CometConf.COMET_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ENABLED.key, "true")
+    conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "2g")
+    conf.set(CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ALL_EXPR_ENABLED.key, "true")
+  }
+
+  protected override def createSparkSession: TestSparkSession = {
+    new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, testSparkConf))
+  }
+
+  val tableNames: Seq[String] =
+    Seq("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier")
+
+  // We use SF=1 table data here, so we cannot use SF=100 stats
+  protected override val injectStats: Boolean = false
+
+  if (tpchDataPath.nonEmpty) {
+    val nonExistentTables = tableNames.filterNot { tableName =>
+      Files.exists(Paths.get(s"${tpchDataPath.get}/$tableName"))
+    }
+    if (nonExistentTables.nonEmpty) {
+      fail(
+        s"Non-existent TPCH table paths found in ${tpchDataPath.get}: " +
+          nonExistentTables.mkString(", "))
+    }
+  }
+
+  protected val baseResourcePath: String = {
+    // use the same way as `SQLQueryTestSuite` to get the resource path
+    getWorkspaceFilePath(
+      "spark",
+      "src",
+      "test",
+      "resources",
+      "tpch-query-results").toFile.getAbsolutePath
+  }
+
+  override def createTables(): Unit = {
+    tableNames.foreach { tableName =>
+      spark.catalog.createTable(tableName, s"${tpchDataPath.get}/$tableName", "parquet")
+    }
+  }
+
+  override def dropTables(): Unit = {
+    tableNames.foreach { tableName =>
+      spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true)
+    }
+  }
+
+  private def runQuery(query: String, goldenFile: File, conf: Map[String, String]): Unit = {
+    val shouldSortResults = sortMergeJoinConf != conf // Sort for other joins
+    withSQLConf(conf.toSeq: _*) {
+      try {
+        val (schema, output) = handleExceptions(getNormalizedResult(spark, query))
+        val queryString = query.trim
+        val outputString = output.mkString("\n").replaceAll("\\s+$", "")
+        if (shouldRegenerateGoldenFiles) {
+          val goldenOutput = {
+            s"-- Automatically generated by ${getClass.getSimpleName}\n\n" +
+              "-- !query schema\n" +
+              schema + "\n" +
+              "-- !query output\n" +
+              outputString +
+              "\n"
+          }
+          val parent = goldenFile.getParentFile
+          if (!parent.exists()) {
+            assert(parent.mkdirs(), "Could not create directory: " + parent)
+          }
+          stringToFile(goldenFile, goldenOutput)
+        }
+
+        // Read back the golden file.
+        val (expectedSchema, expectedOutput) = {
+          val goldenOutput = fileToString(goldenFile)
+          val segments = goldenOutput.split("-- !query.*\n")
+
+          // query has 3 segments, plus the header
+          assert(
+            segments.size == 3,
+            s"Expected 3 blocks in result file but got ${segments.size}. " +
+              "Try regenerate the result files.")
+
+          (segments(1).trim, segments(2).replaceAll("\\s+$", ""))
+        }
+
+        assertResult(expectedSchema, s"Schema did not match\n$queryString") {
+          schema
+        }
+        if (shouldSortResults) {
+          val expectSorted = expectedOutput
+            .split("\n")
+            .sorted
+            .map(_.trim)
+            .mkString("\n")
+            .replaceAll("\\s+$", "")
+          val outputSorted = output.sorted.map(_.trim).mkString("\n").replaceAll("\\s+$", "")
+          assertResult(expectSorted, s"Result did not match\n$queryString") {
+            outputSorted
+          }
+        } else {
+          assertResult(expectedOutput, s"Result did not match\n$queryString") {
+            outputString
+          }
+        }
+      } catch {
+        case e: Throwable =>
+          val configs = conf.map { case (k, v) =>
+            s"$k=$v"
+          }
+          throw new Exception(s"${e.getMessage}\nError using configs:\n${configs.mkString("\n")}")
+      }
+    }
+  }
+
+  val sortMergeJoinConf: Map[String, String] = Map(
+    SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+    SQLConf.PREFER_SORTMERGEJOIN.key -> "true")
+
+  val broadcastHashJoinConf: Map[String, String] = Map(
+    SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10485760")
+
+  val shuffledHashJoinConf: Map[String, String] = Map(
+    SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+    "spark.sql.join.forceApplyShuffledHashJoin" -> "true")
+
+  val allJoinConfCombinations: Seq[Map[String, String]] =
+    Seq(sortMergeJoinConf, broadcastHashJoinConf, shuffledHashJoinConf)
+
+  val joinConfs: Seq[Map[String, String]] = if (shouldRegenerateGoldenFiles) {
+    require(
+      !sys.env.contains("SPARK_TPCH_JOIN_CONF"),
+      "'SPARK_TPCH_JOIN_CONF' cannot be set together with 'SPARK_GENERATE_GOLDEN_FILES'")
+    Seq(sortMergeJoinConf)
+  } else {
+    sys.env
+      .get("SPARK_TPCH_JOIN_CONF")
+      .map { s =>
+        val p = new java.util.Properties()
+        p.load(new java.io.StringReader(s))
+        Seq(p.asScala.toMap)
+      }
+      .getOrElse(allJoinConfCombinations)
+  }
+
+  assert(joinConfs.nonEmpty)
+  joinConfs.foreach(conf =>
+    require(
+      allJoinConfCombinations.contains(conf),
+      s"Join configurations [$conf] should be one of $allJoinConfCombinations"))
+
+  if (tpchDataPath.nonEmpty) {
+    tpchQueries.foreach { name =>
+      if (disabledTpchQueries.contains(name)) {
+        ignore(s"skipped because $name is disabled") {}
+      } else {
+        val queryString = resourceToString(
+          s"tpch/$name.sql",
+          classLoader = Thread.currentThread().getContextClassLoader)
+        test(name) {
+          // Only run the tests in Spark 3.4+
+          assume(isSpark34Plus)
+
+          val goldenFile = new File(s"$baseResourcePath", s"$name.sql.out")
+          joinConfs.foreach { conf =>
+            System.gc() // Workaround for GitHub Actions memory limitation, see also SPARK-37368
+            runQuery(queryString, goldenFile, conf)
+          }
+        }
+      }
+    }
+  } else {
+    ignore("skipped because env `SPARK_TPCH_DATA` is not set") {}
+  }
+
+  // TODO: remove once Spark 3.2 & 3.3 is no longer supported
+  private val shouldRegenerateGoldenFiles: Boolean =
+    System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+}
+
+/**
+ * `TPCBase` doesn't exist in Spark 3.2. TODO: remove once Spark 3.2 is no longer supported
+ */
+trait CometTPCBase extends SharedSparkSession {
+  protected def injectStats: Boolean = false
+
+  override protected def sparkConf: SparkConf = {
+    if (injectStats) {
+      super.sparkConf
+        .set(SQLConf.MAX_TO_STRING_FIELDS, Int.MaxValue)
+        .set(SQLConf.CBO_ENABLED, true)
+        .set(SQLConf.PLAN_STATS_ENABLED, true)
+        .set(SQLConf.JOIN_REORDER_ENABLED, true)
+    } else {
+      super.sparkConf.set(SQLConf.MAX_TO_STRING_FIELDS, Int.MaxValue)
+    }
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    createTables()
+  }
+
+  override def afterAll(): Unit = {
+    dropTables()
+    super.afterAll()
+  }
+
+  protected def createTables(): Unit
+
+  protected def dropTables(): Unit
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryBase.scala
new file mode 100644
index 000000000..8f83ac04b
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryBase.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.util.Try
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+
+import org.apache.comet.{CometConf, CometSparkSessionExtensions}
+
+/**
+ * Base trait for TPC related query execution
+ */
+trait CometTPCQueryBase extends Logging {
+  protected val cometSpark: SparkSession = {
+    val conf = new SparkConf()
+      .setMaster(System.getProperty("spark.sql.test.master", "local[*]"))
+      .setAppName(this.getClass.getSimpleName.stripSuffix("$"))
+      .set("spark.sql.parquet.compression.codec", "snappy")
+      .set(
+        "spark.sql.shuffle.partitions",
+        System.getProperty("spark.sql.shuffle.partitions", "4"))
+      .set("spark.driver.memory", "3g")
+      .set("spark.executor.memory", "3g")
+      .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
+      .set("spark.sql.crossJoin.enabled", "true")
+      .setIfMissing("parquet.enable.dictionary", "true")
+
+    val sparkSession = SparkSession.builder
+      .config(conf)
+      .withExtensions(new CometSparkSessionExtensions)
+      .getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+    sparkSession.conf.set(CometConf.COMET_ENABLED.key, "false")
+    sparkSession.conf.set(CometConf.COMET_EXEC_ENABLED.key, "false")
+
+    sparkSession
+  }
+
+  protected def setupTables(
+      dataLocation: String,
+      createTempView: Boolean,
+      tables: Seq[String],
+      tableColumns: Map[String, StructType] = Map.empty): Map[String, Long] = {
+    tables.map { tableName =>
+      if (createTempView) {
+        cometSpark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
+      } else {
+        cometSpark.sql(s"DROP TABLE IF EXISTS $tableName")
+        if (tableColumns.isEmpty) {
+          cometSpark.catalog.createTable(tableName, s"$dataLocation/$tableName", "parquet")
+        } else {
+          // SPARK-39584: Fix TPCDSQueryBenchmark Measuring Performance of Wrong Query Results
+          val options = Map("path" -> s"$dataLocation/$tableName")
+          cometSpark.catalog.createTable(tableName, "parquet", tableColumns(tableName), options)
+        }
+        // Recover partitions but don't fail if a table is not partitioned.
+        Try {
+          cometSpark.sql(s"ALTER TABLE $tableName RECOVER PARTITIONS")
+        }.getOrElse {
+          logInfo(s"Recovering partitions of table $tableName failed")
+        }
+      }
+      tableName -> cometSpark.table(tableName).count()
+    }.toMap
+  }
+
+  protected def setupCBO(spark: SparkSession, cboEnabled: Boolean, tables: Seq[String]): Unit = {
+    if (cboEnabled) {
+      spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.PLAN_STATS_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.JOIN_REORDER_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.HISTOGRAM_ENABLED.key}=true")
+
+      // Analyze all the tables before running queries
+      val startTime = System.nanoTime()
+      tables.foreach { tableName =>
+        spark.sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR ALL COLUMNS")
+      }
+      logInfo(
+        "The elapsed time to analyze all the tables is " +
+          s"${(System.nanoTime() - startTime) / NANOS_PER_SECOND.toDouble} seconds")
+    } else {
+      spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=false")
+    }
+  }
+
+  protected def filterQueries(
+      origQueries: Seq[String],
+      queryFilter: Set[String],
+      nameSuffix: String = ""): Seq[String] = {
+    if (queryFilter.nonEmpty) {
+      if (nameSuffix.nonEmpty) {
+        origQueries.filter(name => queryFilter.contains(s"$name$nameSuffix"))
+      } else {
+        origQueries.filter(queryFilter.contains)
+      }
+    } else {
+      origQueries
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala
new file mode 100644
index 000000000..1f28b76a1
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.{File, FileOutputStream, OutputStream, PrintStream}
+
+import scala.collection.mutable
+
+import org.apache.commons.io.output.TeeOutputStream
+import org.apache.spark.benchmark.Benchmarks
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.resourceToString
+import org.apache.spark.sql.comet.CometExec
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
+import org.apache.comet.CometConf
+
+trait CometTPCQueryListBase
+    extends CometTPCQueryBase
+    with AdaptiveSparkPlanHelper
+    with SQLHelper {
+  var output: Option[OutputStream] = None
+
+  def main(args: Array[String]): Unit = {
+    val resultFileName =
+      s"${this.getClass.getSimpleName.replace("$", "")}-results.txt"
+    val prefix = Benchmarks.currentProjectRoot.map(_ + "/").getOrElse("")
+    val dir = new File(s"${prefix}inspections/")
+    if (!dir.exists()) {
+      // scalastyle:off println
+      println(s"Creating ${dir.getAbsolutePath} for query inspection results.")
+      // scalastyle:on println
+      dir.mkdirs()
+    }
+    val file = new File(dir, resultFileName)
+    if (!file.exists()) {
+      file.createNewFile()
+    }
+    output = Some(new FileOutputStream(file))
+
+    runSuite(args)
+
+    output.foreach { o =>
+      if (o != null) {
+        o.close()
+      }
+    }
+  }
+
+  protected def runQueries(
+      queryLocation: String,
+      queries: Seq[String],
+      nameSuffix: String = ""): Unit = {
+
+    val out = if (output.isDefined) {
+      new PrintStream(new TeeOutputStream(System.out, output.get))
+    } else {
+      System.out
+    }
+
+    queries.foreach { name =>
+      val queryString = resourceToString(
+        s"$queryLocation/$name.sql",
+        classLoader = Thread.currentThread().getContextClassLoader)
+
+      withSQLConf(
+        CometConf.COMET_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+
+        val df = cometSpark.sql(queryString)
+        val cometPlans = mutable.HashSet.empty[String]
+        stripAQEPlan(df.queryExecution.executedPlan).foreach { case op: CometExec =>
+          cometPlans += s"${op.nodeName}"
+        }
+
+        if (cometPlans.nonEmpty) {
+          out.println(
+            s"Query: $name$nameSuffix. Comet Exec: Enabled (${cometPlans.mkString(", ")})")
+        } else {
+          out.println(s"Query: $name$nameSuffix. Comet Exec: Disabled")
+        }
+      }
+    }
+  }
+
+  protected def checkCometExec(df: DataFrame, f: SparkPlan => Unit): Unit = {
+    if (CometConf.COMET_ENABLED.get() && CometConf.COMET_EXEC_ENABLED.get()) {
+      f(stripAQEPlan(df.queryExecution.executedPlan))
+    }
+  }
+
+  def runSuite(mainArgs: Array[String]): Unit
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
new file mode 100644
index 000000000..08c6cf419
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.concurrent.duration._
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.scalactic.source.Position
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.Tag
+
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.column.ParquetProperties
+import org.apache.parquet.example.data.Group
+import org.apache.parquet.example.data.simple.SimpleGroup
+import org.apache.parquet.hadoop.ParquetWriter
+import org.apache.parquet.hadoop.example.ExampleParquetWriter
+import org.apache.parquet.schema.{MessageType, MessageTypeParser}
+import org.apache.spark._
+import org.apache.spark.sql.comet.{CometBatchScanExec, CometExec, CometScanExec, CometScanWrapper, CometSinkPlaceHolder}
+import org.apache.spark.sql.execution.{ColumnarToRowExec, InputAdapter, SparkPlan, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.internal._
+import org.apache.spark.sql.test._
+import org.apache.spark.sql.types.DecimalType
+import org.apache.spark.sql.types.StructType
+
+import org.apache.comet._
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+/**
+ * Base class for testing. This exists in `org.apache.spark.sql` since [[SQLTestUtils]] is
+ * package-private.
+ */
+abstract class CometTestBase
+    extends QueryTest
+    with SQLTestUtils
+    with BeforeAndAfterEach
+    with AdaptiveSparkPlanHelper {
+  import testImplicits._
+
+  protected def sparkConf: SparkConf = {
+    val conf = new SparkConf()
+    conf.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
+    conf.set(SQLConf.SHUFFLE_PARTITIONS, 10) // reduce parallelism in tests
+    conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "2g")
+    conf
+  }
+
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    super.test(testName, testTags: _*) {
+      withSQLConf(
+        CometConf.COMET_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ALL_EXPR_ENABLED.key -> "true",
+        SQLConf.ANSI_ENABLED.key -> "false") {
+        testFun
+      }
+    }
+  }
+
+  /**
+   * A helper function for comparing Comet DataFrame with Spark result using absolute tolerance.
+   */
+  protected def checkAnswerWithTol(
+      dataFrame: DataFrame,
+      expectedAnswer: Seq[Row],
+      absTol: Double): Unit = {
+    val actualAnswer = dataFrame.collect()
+    require(
+      actualAnswer.length == expectedAnswer.length,
+      s"actual num rows ${actualAnswer.length} != expected num of rows ${expectedAnswer.length}")
+
+    actualAnswer.zip(expectedAnswer).foreach { case (actualRow, expectedRow) =>
+      checkAnswerWithTol(actualRow, expectedRow, absTol)
+    }
+  }
+
+  /**
+   * Compares two answers and makes sure the answer is within absTol of the expected result.
+   */
+  protected def checkAnswerWithTol(
+      actualAnswer: Row,
+      expectedAnswer: Row,
+      absTol: Double): Unit = {
+    require(
+      actualAnswer.length == expectedAnswer.length,
+      s"actual answer length ${actualAnswer.length} != " +
+        s"expected answer length ${expectedAnswer.length}")
+    require(absTol > 0 && absTol <= 1e-6, s"absTol $absTol is out of range (0, 1e-6]")
+
+    actualAnswer.toSeq.zip(expectedAnswer.toSeq).foreach {
+      case (actual: Double, expected: Double) =>
+        if (!actual.isNaN && !expected.isNaN) {
+          assert(
+            math.abs(actual - expected) < absTol,
+            s"actual answer $actual not within $absTol of correct answer $expected")
+        }
+      case (actual, expected) =>
+        assert(actual == expected, s"$actualAnswer did not equal $expectedAnswer")
+    }
+  }
+
+  protected def checkSparkAnswer(query: String): Unit = {
+    checkSparkAnswer(sql(query))
+  }
+
+  protected def checkSparkAnswer(df: => DataFrame): Unit = {
+    var expected: Array[Row] = Array.empty
+    withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+      val dfSpark = Dataset.ofRows(spark, df.logicalPlan)
+      expected = dfSpark.collect()
+    }
+    val dfComet = Dataset.ofRows(spark, df.logicalPlan)
+    checkAnswer(dfComet, expected)
+  }
+
+  protected def checkSparkAnswerAndOperator(query: String, excludedClasses: Class[_]*): Unit = {
+    checkSparkAnswerAndOperator(sql(query), excludedClasses: _*)
+  }
+
+  protected def checkSparkAnswerAndOperator(
+      df: => DataFrame,
+      excludedClasses: Class[_]*): Unit = {
+    checkCometOperators(stripAQEPlan(df.queryExecution.executedPlan), excludedClasses: _*)
+    checkSparkAnswer(df)
+  }
+
+  protected def checkCometOperators(plan: SparkPlan, excludedClasses: Class[_]*): Unit = {
+    plan.foreach {
+      case _: CometScanExec | _: CometBatchScanExec => true
+      case _: CometSinkPlaceHolder | _: CometScanWrapper => false
+      case _: CometExec => true
+      case _: WholeStageCodegenExec | _: ColumnarToRowExec | _: InputAdapter => true
+      case op =>
+        if (excludedClasses.exists(c => c.isAssignableFrom(op.getClass))) {
+          true
+        } else {
+          assert(
+            false,
+            s"Expected only Comet native operators, but found ${op.nodeName}.\n" +
+              s"plan: $plan")
+        }
+    }
+  }
+
+  /**
+   * Check the answer of a Comet SQL query with Spark result using absolute tolerance.
+   */
+  protected def checkSparkAnswerWithTol(query: String, absTol: Double = 1e-6): Unit = {
+    checkSparkAnswerWithTol(sql(query), absTol)
+  }
+
+  /**
+   * Check the answer of a Comet DataFrame with Spark result using absolute tolerance.
+   */
+  protected def checkSparkAnswerWithTol(df: => DataFrame, absTol: Double): Unit = {
+    var expected: Array[Row] = Array.empty
+    withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+      val dfSpark = Dataset.ofRows(spark, df.logicalPlan)
+      expected = dfSpark.collect()
+    }
+    val dfComet = Dataset.ofRows(spark, df.logicalPlan)
+    checkAnswerWithTol(dfComet, expected, absTol: Double)
+  }
+
+  private var _spark: SparkSession = _
+  protected implicit def spark: SparkSession = _spark
+  protected implicit def sqlContext: SQLContext = _spark.sqlContext
+
+  override protected def sparkContext: SparkContext = {
+    SparkContext.clearActiveContext()
+
+    val conf = sparkConf
+
+    if (!conf.contains("spark.master")) {
+      conf.setMaster("local[5]")
+    }
+
+    if (!conf.contains("spark.app.name")) {
+      conf.setAppName(java.util.UUID.randomUUID().toString)
+    }
+
+    SparkContext.getOrCreate(conf)
+  }
+
+  protected def createSparkSession: SparkSession = {
+    SparkSession.cleanupAnyExistingSession()
+
+    SparkSession
+      .builder()
+      .config(
+        sparkContext.getConf
+      ) // Don't use `sparkConf` as we can have overridden it in plugin
+      .withExtensions(new CometSparkSessionExtensions)
+      .getOrCreate()
+  }
+
+  protected def initializeSession(): Unit = {
+    if (_spark == null) _spark = createSparkSession
+  }
+
+  protected override def beforeAll(): Unit = {
+    initializeSession()
+    super.beforeAll()
+  }
+
+  protected override def afterAll(): Unit = {
+    try {
+      super.afterAll()
+    } finally {
+      if (_spark != null) {
+        try {
+          _spark.stop()
+        } finally {
+          _spark = null
+          SparkSession.clearActiveSession()
+          SparkSession.clearDefaultSession()
+        }
+      }
+    }
+  }
+
+  protected override def beforeEach(): Unit = {
+    super.beforeEach()
+    DebugFilesystem.clearOpenStreams()
+  }
+
+  protected override def afterEach(): Unit = {
+    super.afterEach()
+    spark.sharedState.cacheManager.clearCache()
+    eventually(timeout(10.seconds), interval(2.seconds)) {
+      DebugFilesystem.assertNoOpenStreams()
+    }
+  }
+
+  protected def readResourceParquetFile(name: String): DataFrame = {
+    spark.read.parquet(getResourceParquetFilePath(name))
+  }
+
+  protected def getResourceParquetFilePath(name: String): String = {
+    Thread.currentThread().getContextClassLoader.getResource(name).toString
+  }
+
+  protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      withDictionary: Boolean = true,
+      schema: Option[StructType] = None)(f: DataFrame => Unit): Unit = {
+    withParquetFile(data, withDictionary)(path => readParquetFile(path, schema)(f))
+  }
+
+  protected def withParquetTable[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      tableName: String,
+      withDictionary: Boolean = true)(f: => Unit): Unit = {
+    withParquetDataFrame(data, withDictionary) { df =>
+      df.createOrReplaceTempView(tableName)
+      withTempView(tableName)(f)
+    }
+  }
+
+  protected def withParquetTable(df: DataFrame, tableName: String)(f: => Unit): Unit = {
+    df.createOrReplaceTempView(tableName)
+    withTempView(tableName)(f)
+  }
+
+  protected def withParquetTable(path: String, tableName: String)(f: => Unit): Unit = {
+    val df = spark.read.format("parquet").load(path)
+    withParquetTable(df, tableName)(f)
+  }
+
+  protected def withParquetFile[T <: Product: ClassTag: TypeTag](
+      data: Seq[T],
+      withDictionary: Boolean = true)(f: String => Unit): Unit = {
+    withTempPath { file =>
+      spark
+        .createDataFrame(data)
+        .write
+        .option("parquet.enable.dictionary", withDictionary.toString)
+        .parquet(file.getCanonicalPath)
+      f(file.getCanonicalPath)
+    }
+  }
+
+  protected def readParquetFile(path: String, schema: Option[StructType] = None)(
+      f: DataFrame => Unit): Unit = schema match {
+    case Some(s) => f(spark.read.format("parquet").schema(s).load(path))
+    case None => f(spark.read.format("parquet").load(path))
+  }
+
+  protected def createParquetWriter(
+      schema: MessageType,
+      path: Path,
+      dictionaryEnabled: Boolean = false,
+      pageSize: Int = 1024,
+      dictionaryPageSize: Int = 1024,
+      pageRowCountLimit: Int = ParquetProperties.DEFAULT_PAGE_ROW_COUNT_LIMIT,
+      rowGroupSize: Long = 1024 * 1024L): ParquetWriter[Group] = {
+    val hadoopConf = spark.sessionState.newHadoopConf()
+
+    ExampleParquetWriter
+      .builder(path)
+      .withDictionaryEncoding(dictionaryEnabled)
+      .withType(schema)
+      .withRowGroupSize(rowGroupSize.toInt)
+      .withPageSize(pageSize)
+      .withDictionaryPageSize(dictionaryPageSize)
+      .withPageRowCountLimit(pageRowCountLimit)
+      .withConf(hadoopConf)
+      .build()
+  }
+
+  // Maps `i` to both positive and negative to test timestamp after and before the Unix epoch
+  protected def getValue(i: Long, div: Long): Long = {
+    val value = if (i % 2 == 0) i else -i
+    value % div
+  }
+
+  def makeParquetFileAllTypes(path: Path, dictionaryEnabled: Boolean, n: Int): Unit = {
+    makeParquetFileAllTypes(path, dictionaryEnabled, 0, n)
+  }
+
+  def makeParquetFileAllTypes(
+      path: Path,
+      dictionaryEnabled: Boolean,
+      begin: Int,
+      end: Int,
+      pageSize: Int = 128,
+      randomSize: Int = 0): Unit = {
+    val schemaStr =
+      if (isSpark34Plus) {
+        """
+          |message root {
+          |  optional boolean                  _1;
+          |  optional int32                    _2(INT_8);
+          |  optional int32                    _3(INT_16);
+          |  optional int32                    _4;
+          |  optional int64                    _5;
+          |  optional float                    _6;
+          |  optional double                   _7;
+          |  optional binary                   _8(UTF8);
+          |  optional int32                    _9(UINT_8);
+          |  optional int32                    _10(UINT_16);
+          |  optional int32                    _11(UINT_32);
+          |  optional int64                    _12(UINT_64);
+          |  optional binary                   _13(ENUM);
+          |  optional FIXED_LEN_BYTE_ARRAY(3)  _14;
+          |  optional int32                    _15(DECIMAL(5, 2));
+          |  optional int64                    _16(DECIMAL(18, 10));
+          |  optional FIXED_LEN_BYTE_ARRAY(16) _17(DECIMAL(38, 37));
+          |  optional INT64                    _18(TIMESTAMP(MILLIS,true));
+          |  optional INT64                    _19(TIMESTAMP(MICROS,true));
+          |  optional INT32                    _20(DATE);
+          |}
+        """.stripMargin
+      } else {
+        """
+          |message root {
+          |  optional boolean                  _1;
+          |  optional int32                    _2(INT_8);
+          |  optional int32                    _3(INT_16);
+          |  optional int32                    _4;
+          |  optional int64                    _5;
+          |  optional float                    _6;
+          |  optional double                   _7;
+          |  optional binary                   _8(UTF8);
+          |  optional int32                    _9(UINT_8);
+          |  optional int32                    _10(UINT_16);
+          |  optional int32                    _11(UINT_32);
+          |  optional int64                    _12(UINT_64);
+          |  optional binary                   _13(ENUM);
+          |  optional binary                   _14(UTF8);
+          |  optional int32                    _15(DECIMAL(5, 2));
+          |  optional int64                    _16(DECIMAL(18, 10));
+          |  optional FIXED_LEN_BYTE_ARRAY(16) _17(DECIMAL(38, 37));
+          |  optional INT64                    _18(TIMESTAMP(MILLIS,true));
+          |  optional INT64                    _19(TIMESTAMP(MICROS,true));
+          |  optional INT32                    _20(DATE);
+          |}
+        """.stripMargin
+      }
+
+    val schema = MessageTypeParser.parseMessageType(schemaStr)
+    val writer = createParquetWriter(
+      schema,
+      path,
+      dictionaryEnabled = dictionaryEnabled,
+      pageSize = pageSize,
+      dictionaryPageSize = pageSize)
+
+    val rand = scala.util.Random
+    val data = (begin until end).map { i =>
+      if (rand.nextBoolean()) {
+        None
+      } else {
+        if (dictionaryEnabled) Some(i % 4) else Some(i)
+      }
+    }
+    data.foreach { opt =>
+      val record = new SimpleGroup(schema)
+      opt match {
+        case Some(i) =>
+          record.add(0, i % 2 == 0)
+          record.add(1, i.toByte)
+          record.add(2, i.toShort)
+          record.add(3, i)
+          record.add(4, i.toLong)
+          record.add(5, i.toFloat)
+          record.add(6, i.toDouble)
+          record.add(7, i.toString * 48)
+          record.add(8, (-i).toByte)
+          record.add(9, (-i).toShort)
+          record.add(10, -i)
+          record.add(11, (-i).toLong)
+          record.add(12, i.toString)
+          record.add(13, ((i % 10).toString * 3).take(3))
+          record.add(14, i)
+          record.add(15, i.toLong)
+          record.add(16, ((i % 10).toString * 16).take(16))
+          record.add(17, i.toLong)
+          record.add(18, i.toLong)
+          record.add(19, i)
+        case _ =>
+      }
+      writer.write(record)
+    }
+    (0 until randomSize).foreach { _ =>
+      val i = rand.nextLong()
+      val record = new SimpleGroup(schema)
+      record.add(0, i % 2 == 0)
+      record.add(1, i.toByte)
+      record.add(2, i.toShort)
+      record.add(3, i.toInt)
+      record.add(4, i)
+      record.add(5, java.lang.Float.intBitsToFloat(i.toInt))
+      record.add(6, java.lang.Double.longBitsToDouble(i))
+      record.add(7, i.toString * 24)
+      record.add(8, (-i).toByte)
+      record.add(9, (-i).toShort)
+      record.add(10, (-i).toInt)
+      record.add(11, -i)
+      record.add(12, i.toString)
+      record.add(13, i.toString.take(3).padTo(3, '0'))
+      record.add(14, i.toInt % 100000)
+      record.add(15, i % 1000000000000000000L)
+      record.add(16, i.toString.take(16).padTo(16, '0'))
+      record.add(17, i)
+      record.add(18, i)
+      record.add(19, i.toInt)
+      writer.write(record)
+    }
+
+    writer.close()
+  }
+
+  protected def makeRawTimeParquetFileColumns(
+      path: Path,
+      dictionaryEnabled: Boolean,
+      n: Int,
+      rowGroupSize: Long = 1024 * 1024L): Seq[Option[Long]] = {
+    val schemaStr =
+      """
+        |message root {
+        |  optional int64 _0(INT_64);
+        |  optional int64 _1(INT_64);
+        |  optional int64 _2(INT_64);
+        |  optional int64 _3(INT_64);
+        |  optional int64 _4(INT_64);
+        |  optional int64 _5(INT_64);
+        |}
+        """.stripMargin
+
+    val schema = MessageTypeParser.parseMessageType(schemaStr)
+    val writer = createParquetWriter(
+      schema,
+      path,
+      dictionaryEnabled = dictionaryEnabled,
+      rowGroupSize = rowGroupSize)
+    val div = if (dictionaryEnabled) 10 else n // maps value to a small range for dict to kick in
+
+    val rand = scala.util.Random
+    val expected = (0 until n).map { i =>
+      if (rand.nextBoolean()) {
+        None
+      } else {
+        Some(getValue(i, div))
+      }
+    }
+    expected.foreach { opt =>
+      val record = new SimpleGroup(schema)
+      opt match {
+        case Some(i) =>
+          record.add(0, i)
+          record.add(1, i * 1000) // convert millis to micros, same below
+          record.add(2, i)
+          record.add(3, i)
+          record.add(4, i * 1000)
+          record.add(5, i * 1000)
+        case _ =>
+      }
+      writer.write(record)
+    }
+
+    writer.close()
+    expected
+  }
+
+  // Creates Parquet file of timestamp values
+  protected def makeRawTimeParquetFile(
+      path: Path,
+      dictionaryEnabled: Boolean,
+      n: Int,
+      rowGroupSize: Long = 1024 * 1024L): Seq[Option[Long]] = {
+    val schemaStr =
+      """
+        |message root {
+        |  optional int64 _0(TIMESTAMP_MILLIS);
+        |  optional int64 _1(TIMESTAMP_MICROS);
+        |  optional int64 _2(TIMESTAMP(MILLIS,true));
+        |  optional int64 _3(TIMESTAMP(MILLIS,false));
+        |  optional int64 _4(TIMESTAMP(MICROS,true));
+        |  optional int64 _5(TIMESTAMP(MICROS,false));
+        |  optional int64 _6(INT_64);
+        |}
+        """.stripMargin
+
+    val schema = MessageTypeParser.parseMessageType(schemaStr)
+    val writer = createParquetWriter(
+      schema,
+      path,
+      dictionaryEnabled = dictionaryEnabled,
+      rowGroupSize = rowGroupSize)
+    val div = if (dictionaryEnabled) 10 else n // maps value to a small range for dict to kick in
+
+    val rand = scala.util.Random
+    val expected = (0 until n).map { i =>
+      if (rand.nextBoolean()) {
+        None
+      } else {
+        Some(getValue(i, div))
+      }
+    }
+    expected.foreach { opt =>
+      val record = new SimpleGroup(schema)
+      opt match {
+        case Some(i) =>
+          record.add(0, i)
+          record.add(1, i * 1000) // convert millis to micros, same below
+          record.add(2, i)
+          record.add(3, i)
+          record.add(4, i * 1000)
+          record.add(5, i * 1000)
+          record.add(6, i * 1000)
+        case _ =>
+      }
+      writer.write(record)
+    }
+
+    writer.close()
+    expected
+  }
+
+  def makeDecimalRDD(num: Int, decimal: DecimalType, useDictionary: Boolean): DataFrame = {
+    val div = if (useDictionary) 5 else num // narrow the space to make it dictionary encoded
+    spark
+      .range(num)
+      .map(_ % div)
+      // Parquet doesn't allow column names with spaces, have to add an alias here.
+      // Minus 500 here so that negative decimals are also tested.
+      .select((($"value" - 500) / 100.0) cast decimal as Symbol("dec"))
+      .coalesce(1)
+  }
+
+  def stripRandomPlanParts(plan: String): String = {
+    plan.replaceFirst("file:.*,", "").replaceAll(raw"#\d+", "")
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/GenTPCHData.scala b/spark/src/test/scala/org/apache/spark/sql/GenTPCHData.scala
new file mode 100644
index 000000000..1dcbc743b
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/GenTPCHData.scala
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.{File, PrintWriter}
+import java.nio.file.Files
+import java.nio.file.Path
+
+import scala.sys.process._
+import scala.util.Try
+
+import org.apache.commons.io.FileUtils
+import org.apache.spark.deploy.SparkHadoopUtil
+
+/**
+ * This class generates TPCH table data by using tpch-dbgen:
+ *   - https://github.com/databricks/tpch-dbgen
+ *
+ * To run this:
+ * {{{
+ *   make benchmark-org.apache.spark.sql.GenTPCHData -- --location <path> --scaleFactor 1
+ * }}}
+ */
+object GenTPCHData {
+  val TEMP_DBGEN_DIR: Path = new File("/tmp").toPath
+  val DBGEN_DIR_PREFIX = "tempTPCHGen"
+
+  def main(args: Array[String]): Unit = {
+    val config = new GenTPCHDataConfig(args)
+
+    val spark = SparkSession
+      .builder()
+      .appName(getClass.getName)
+      .master(config.master)
+      .getOrCreate()
+
+    setScaleConfig(spark, config.scaleFactor)
+
+    // Number of worker nodes
+    val workers = spark.sparkContext.getExecutorMemoryStatus.size
+
+    var defaultDbgenDir: File = null
+
+    val dbgenDir = if (config.dbgenDir == null) {
+      defaultDbgenDir = Files.createTempDirectory(TEMP_DBGEN_DIR, DBGEN_DIR_PREFIX).toFile
+      val baseDir = defaultDbgenDir.getAbsolutePath
+      defaultDbgenDir.delete()
+      // Install the data generators in all nodes
+      // TODO: think a better way to install on each worker node
+      //       such as https://stackoverflow.com/a/40876671
+      spark.range(0, workers, 1, workers).foreach(worker => installDBGEN(baseDir)(worker))
+      s"${baseDir}/dbgen"
+    } else {
+      config.dbgenDir
+    }
+
+    val tables = new TPCHTables(spark.sqlContext, dbgenDir, config.scaleFactor.toString)
+
+    // Generate data
+    // Since dbgen may uses stdout to output the data, tables.genData needs to run table by table
+    val tableNames =
+      if (config.tableFilter.isBlank) tables.tables.map(_.name) else Seq(config.tableFilter)
+    tableNames.foreach { tableName =>
+      tables.genData(
+        location = s"${config.location}/tpch/sf${config.scaleFactor}_${config.format}",
+        format = config.format,
+        overwrite = config.overwrite,
+        partitionTables = config.partitionTables,
+        clusterByPartitionColumns = config.clusterByPartitionColumns,
+        filterOutNullPartitionValues = config.filterOutNullPartitionValues,
+        tableFilter = tableName,
+        numPartitions = config.numPartitions)
+    }
+
+    // Clean up
+    if (defaultDbgenDir != null) {
+      spark.range(0, workers, 1, workers).foreach { _ =>
+        val _ = FileUtils.deleteQuietly(defaultDbgenDir)
+      }
+    }
+
+    spark.stop()
+  }
+
+  def setScaleConfig(spark: SparkSession, scaleFactor: Int): Unit = {
+    // Avoid OOM when shuffling large scale factors and errors like 2GB shuffle limit at 10TB like:
+    // org.apache.spark.shuffle.FetchFailedException: Too large frame: 9640891355
+    // For 10TB 16x4core nodes were needed with the config below, 8x for 1TB and below.
+    // About 24hrs. for SF 1 to 10,000.
+    if (scaleFactor >= 10000) {
+      spark.conf.set("spark.sql.shuffle.partitions", "20000")
+      SparkHadoopUtil.get.conf.set("parquet.memory.pool.ratio", "0.1")
+    } else if (scaleFactor >= 1000) {
+      spark.conf.set(
+        "spark.sql.shuffle.partitions",
+        "2001"
+      ) // one above 2000 to use HighlyCompressedMapStatus
+      SparkHadoopUtil.get.conf.set("parquet.memory.pool.ratio", "0.3")
+    } else {
+      spark.conf.set("spark.sql.shuffle.partitions", "200") // default
+      SparkHadoopUtil.get.conf.set("parquet.memory.pool.ratio", "0.5")
+    }
+  }
+
+  // Install tpch-dbgen (with the stdout patch)
+  def installDBGEN(
+      baseDir: String,
+      url: String = "https://github.com/databricks/tpch-dbgen.git",
+      useStdout: Boolean = true)(i: java.lang.Long): Unit = {
+    // Check if we want the revision which makes dbgen output to stdout
+    val checkoutRevision: String =
+      if (useStdout) "git checkout 0469309147b42abac8857fa61b4cf69a6d3128a8 -- bm_utils.c" else ""
+
+    Seq("mkdir", "-p", baseDir).!
+    val pw = new PrintWriter(s"${baseDir}/dbgen_$i.sh")
+    pw.write(s"""
+      |rm -rf ${baseDir}/dbgen
+      |rm -rf ${baseDir}/dbgen_install_$i
+      |mkdir ${baseDir}/dbgen_install_$i
+      |cd ${baseDir}/dbgen_install_$i
+      |git clone '$url'
+      |cd tpch-dbgen
+      |$checkoutRevision
+      |sed -i'' -e 's/#include <malloc\\.h>/#ifndef __APPLE__\\n#include <malloc\\.h>\\n#endif/' bm_utils.c
+      |sed -i'' -e 's/#include <malloc\\.h>/#if defined(__MACH__)\\n#include <stdlib\\.h>\\n#else\\n#include <malloc\\.h>\\n#endif/' varsub.c
+      |make
+      |ln -sf ${baseDir}/dbgen_install_$i/tpch-dbgen ${baseDir}/dbgen || echo "ln -sf failed"
+      |test -e ${baseDir}/dbgen/dbgen
+      |echo "OK"
+      """.stripMargin)
+    pw.close
+    Seq("chmod", "+x", s"${baseDir}/dbgen_$i.sh").!
+    Seq(s"${baseDir}/dbgen_$i.sh").!!
+  }
+}
+
+class GenTPCHDataConfig(args: Array[String]) {
+  var master: String = "local[*]"
+  var dbgenDir: String = null
+  var location: String = null
+  var scaleFactor: Int = 1
+  var format: String = "parquet"
+  var overwrite: Boolean = false
+  var partitionTables: Boolean = false
+  var clusterByPartitionColumns: Boolean = false
+  var filterOutNullPartitionValues: Boolean = false
+  var tableFilter: String = ""
+  var numPartitions: Int = 100
+
+  parseArgs(args.toList)
+
+  private def parseArgs(inputArgs: List[String]): Unit = {
+    var args = inputArgs
+
+    while (args.nonEmpty) {
+      args match {
+        case "--master" :: value :: tail =>
+          master = value
+          args = tail
+
+        case "--dbgenDir" :: value :: tail =>
+          dbgenDir = value
+          args = tail
+
+        case "--location" :: value :: tail =>
+          location = value
+          args = tail
+
+        case "--scaleFactor" :: value :: tail =>
+          scaleFactor = toPositiveIntValue("Scale factor", value)
+          args = tail
+
+        case "--format" :: value :: tail =>
+          format = value
+          args = tail
+
+        case "--overwrite" :: tail =>
+          overwrite = true
+          args = tail
+
+        case "--partitionTables" :: tail =>
+          partitionTables = true
+          args = tail
+
+        case "--clusterByPartitionColumns" :: tail =>
+          clusterByPartitionColumns = true
+          args = tail
+
+        case "--filterOutNullPartitionValues" :: tail =>
+          filterOutNullPartitionValues = true
+          args = tail
+
+        case "--tableFilter" :: value :: tail =>
+          tableFilter = value
+          args = tail
+
+        case "--numPartitions" :: value :: tail =>
+          numPartitions = toPositiveIntValue("Number of partitions", value)
+          args = tail
+
+        case "--help" :: _ =>
+          printUsageAndExit(0)
+
+        case _ =>
+          // scalastyle:off println
+          System.err.println("Unknown/unsupported param " + args)
+          // scalastyle:on println
+          printUsageAndExit(1)
+      }
+    }
+
+    checkRequiredArguments()
+  }
+
+  private def printUsageAndExit(exitCode: Int): Unit = {
+    // scalastyle:off
+    System.err.println("""
+      |make benchmark-org.apache.spark.sql.GenTPCHData -- [Options]
+      |Options:
+      |  --master                        the Spark master to use, default to local[*]
+      |  --dbgenDir                      location of dbgen on worker nodes, if not provided, installs default dbgen
+      |  --location                      root directory of location to generate data in
+      |  --scaleFactor                   size of the dataset to generate (in GB)
+      |  --format                        generated data format, Parquet, ORC ...
+      |  --overwrite                     whether to overwrite the data that is already there
+      |  --partitionTables               whether to create the partitioned fact tables
+      |  --clusterByPartitionColumns     whether to shuffle to get partitions coalesced into single files
+      |  --filterOutNullPartitionValues  whether to filter out the partition with NULL key value
+      |  --tableFilter                   comma-separated list of table names to generate (e.g., store_sales,store_returns),
+      |                                  all the tables are generated by default
+      |  --numPartitions                 how many dbgen partitions to run - number of input tasks
+      """.stripMargin)
+    // scalastyle:on
+    System.exit(exitCode)
+  }
+
+  private def toPositiveIntValue(name: String, v: String): Int = {
+    if (Try(v.toInt).getOrElse(-1) <= 0) {
+      // scalastyle:off println
+      System.err.println(s"$name must be a positive number")
+      // scalastyle:on println
+      printUsageAndExit(-1)
+    }
+    v.toInt
+  }
+
+  private def checkRequiredArguments(): Unit = {
+    if (location == null) {
+      // scalastyle:off println
+      System.err.println("Must specify an output location")
+      // scalastyle:on println
+      printUsageAndExit(-1)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/TPCDSQueries.scala b/spark/src/test/scala/org/apache/spark/sql/TPCDSQueries.scala
new file mode 100644
index 000000000..7db65669d
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/TPCDSQueries.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql
+
+trait TPCDSQueries {
+  // List of all TPC-DS v1.4 queries
+  val tpcdsQueries: Seq[String] = Seq(
+    "q1",
+    "q2",
+    "q3",
+    "q4",
+    "q5",
+    "q6",
+    "q7",
+    "q8",
+    "q9",
+    "q10",
+    "q11",
+    "q12",
+    "q13",
+    "q14a",
+    "q14b",
+    "q15",
+    "q16",
+    "q17",
+    "q18",
+    "q19",
+    "q20",
+    "q21",
+    "q22",
+    "q23a",
+    "q23b",
+    "q24a",
+    "q24b",
+    "q25",
+    "q26",
+    "q27",
+    "q28",
+    "q29",
+    "q30",
+    "q31",
+    "q32",
+    "q33",
+    "q34",
+    "q35",
+    "q36",
+    "q37",
+    "q38",
+    "q39a",
+    "q39b",
+    "q40",
+    "q41",
+    "q42",
+    "q43",
+    "q44",
+    "q45",
+    "q46",
+    "q47",
+    "q48",
+    "q49",
+    "q50",
+    "q51",
+    "q52",
+    "q53",
+    "q54",
+    "q55",
+    "q56",
+    "q57",
+    "q58",
+    "q59",
+    "q60",
+    "q61",
+    "q62",
+    "q63",
+    "q64",
+    "q65",
+    "q66",
+    "q67",
+    "q68",
+    "q69",
+    "q70",
+    "q71",
+    "q72",
+    "q73",
+    "q74",
+    "q75",
+    "q76",
+    "q77",
+    "q78",
+    "q79",
+    "q80",
+    "q81",
+    "q82",
+    "q83",
+    "q84",
+    "q85",
+    "q86",
+    "q87",
+    "q88",
+    "q89",
+    "q90",
+    "q91",
+    "q92",
+    "q93",
+    "q94",
+    "q95",
+    "q96",
+    "q97",
+    "q98",
+    "q99")
+
+  // This list only includes TPC-DS v2.7 queries that are different from v1.4 ones
+  val nameSuffixForQueriesV2_7 = "-v2.7"
+  val tpcdsQueriesV2_7: Seq[String] = Seq(
+    "q5a",
+    "q6",
+    "q10a",
+    "q11",
+    "q12",
+    "q14",
+    "q14a",
+    "q18a",
+    "q20",
+    "q22",
+    "q22a",
+    "q24",
+    "q27a",
+    "q34",
+    "q35",
+    "q35a",
+    "q36a",
+    "q47",
+    "q49",
+    "q51a",
+    "q57",
+    "q64",
+    "q67a",
+    "q70a",
+    "q72",
+    "q74",
+    "q75",
+    "q77a",
+    "q78",
+    "q80a",
+    "q86a",
+    "q98")
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/TPCH.scala b/spark/src/test/scala/org/apache/spark/sql/TPCH.scala
new file mode 100644
index 000000000..31e5cf798
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/TPCH.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Mostly copied from
+ * https://github.com/databricks/spark-sql-perf/blob/master/src/main/scala/com/databricks/spark/sql/perf/tpch/TPCH.scala
+ *
+ * TODO: port this back to the upstream Spark similar to TPCDS benchmark
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+
+class Dbgen(dbgenDir: String, params: Seq[String]) extends DataGenerator {
+  val dbgen: String = s"$dbgenDir/dbgen"
+
+  def generate(
+      sparkContext: SparkContext,
+      name: String,
+      partitions: Int,
+      scaleFactor: String): RDD[String] = {
+    val smallTables = Seq("nation", "region")
+    val numPartitions = if (partitions > 1 && !smallTables.contains(name)) partitions else 1
+    val generatedData = {
+      sparkContext
+        .parallelize(1 to numPartitions, numPartitions)
+        .flatMap { i =>
+          val localToolsDir = if (new java.io.File(dbgen).exists) {
+            dbgenDir
+          } else if (new java.io.File(s"/$dbgenDir").exists) {
+            s"/$dbgenDir"
+          } else {
+            sys.error(s"Could not find dbgen at $dbgen or /$dbgenDir. Run install")
+          }
+          val parallel = if (numPartitions > 1) s"-C $partitions -S $i" else ""
+          val shortTableNames = Map(
+            "customer" -> "c",
+            "lineitem" -> "L",
+            "nation" -> "n",
+            "orders" -> "O",
+            "part" -> "P",
+            "region" -> "r",
+            "supplier" -> "s",
+            "partsupp" -> "S")
+          val paramsString = params.mkString(" ")
+          val commands = Seq(
+            "bash",
+            "-c",
+            s"cd $localToolsDir && ./dbgen -q $paramsString -T ${shortTableNames(name)} " +
+              s"-s $scaleFactor $parallel")
+          println(commands)
+          BlockingLineStream(commands)
+        }
+        .repartition(numPartitions)
+    }
+
+    generatedData.setName(s"$name, sf=$scaleFactor, strings")
+    generatedData
+  }
+}
+
+class TPCHTables(
+    sqlContext: SQLContext,
+    dbgenDir: String,
+    scaleFactor: String,
+    useDoubleForDecimal: Boolean = false,
+    useStringForDate: Boolean = false,
+    generatorParams: Seq[String] = Nil)
+    extends Tables(sqlContext, scaleFactor, useDoubleForDecimal, useStringForDate) {
+  import sqlContext.implicits._
+
+  val dataGenerator = new Dbgen(dbgenDir, generatorParams)
+
+  val tables: Seq[Table] = Seq(
+    Table(
+      "part",
+      partitionColumns = "p_brand" :: Nil,
+      'p_partkey.long,
+      'p_name.string,
+      'p_mfgr.string,
+      'p_brand.string,
+      'p_type.string,
+      'p_size.int,
+      'p_container.string,
+      'p_retailprice.decimal(12, 2),
+      'p_comment.string),
+    Table(
+      "supplier",
+      partitionColumns = Nil,
+      's_suppkey.long,
+      's_name.string,
+      's_address.string,
+      's_nationkey.long,
+      's_phone.string,
+      's_acctbal.decimal(12, 2),
+      's_comment.string),
+    Table(
+      "partsupp",
+      partitionColumns = Nil,
+      'ps_partkey.long,
+      'ps_suppkey.long,
+      'ps_availqty.int,
+      'ps_supplycost.decimal(12, 2),
+      'ps_comment.string),
+    Table(
+      "customer",
+      partitionColumns = "c_mktsegment" :: Nil,
+      'c_custkey.long,
+      'c_name.string,
+      'c_address.string,
+      'c_nationkey.long,
+      'c_phone.string,
+      'c_acctbal.decimal(12, 2),
+      'c_mktsegment.string,
+      'c_comment.string),
+    Table(
+      "orders",
+      partitionColumns = "o_orderdate" :: Nil,
+      'o_orderkey.long,
+      'o_custkey.long,
+      'o_orderstatus.string,
+      'o_totalprice.decimal(12, 2),
+      'o_orderdate.date,
+      'o_orderpriority.string,
+      'o_clerk.string,
+      'o_shippriority.int,
+      'o_comment.string),
+    Table(
+      "lineitem",
+      partitionColumns = "l_shipdate" :: Nil,
+      'l_orderkey.long,
+      'l_partkey.long,
+      'l_suppkey.long,
+      'l_linenumber.int,
+      'l_quantity.decimal(12, 2),
+      'l_extendedprice.decimal(12, 2),
+      'l_discount.decimal(12, 2),
+      'l_tax.decimal(12, 2),
+      'l_returnflag.string,
+      'l_linestatus.string,
+      'l_shipdate.date,
+      'l_commitdate.date,
+      'l_receiptdate.date,
+      'l_shipinstruct.string,
+      'l_shipmode.string,
+      'l_comment.string),
+    Table(
+      "nation",
+      partitionColumns = Nil,
+      'n_nationkey.long,
+      'n_name.string,
+      'n_regionkey.long,
+      'n_comment.string),
+    Table("region", partitionColumns = Nil, 'r_regionkey.long, 'r_name.string, 'r_comment.string))
+    .map(_.convertTypes())
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/Tables.scala b/spark/src/test/scala/org/apache/spark/sql/Tables.scala
new file mode 100644
index 000000000..138238da8
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/Tables.scala
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Mostly copied from
+ * https://github.com/databricks/spark-sql-perf/blob/master/src/main/scala/com/databricks/spark/sql/perf/Tables.scala
+ *
+ * TODO: port this back to the upstream Spark similar to TPCDS benchmark
+ */
+
+package org.apache.spark.sql
+
+import scala.util.Try
+
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+
+trait DataGenerator extends Serializable {
+  def generate(
+      sparkContext: SparkContext,
+      name: String,
+      partitions: Int,
+      scaleFactor: String): RDD[String]
+}
+
+abstract class Tables(
+    sqlContext: SQLContext,
+    scaleFactor: String,
+    useDoubleForDecimal: Boolean = false,
+    useStringForDate: Boolean = false)
+    extends Serializable {
+
+  def dataGenerator: DataGenerator
+  def tables: Seq[Table]
+
+  private val log = LoggerFactory.getLogger(getClass)
+
+  def sparkContext = sqlContext.sparkContext
+
+  case class Table(name: String, partitionColumns: Seq[String], fields: StructField*) {
+    val schema: StructType = StructType(fields)
+
+    def nonPartitioned: Table = {
+      Table(name, Nil, fields: _*)
+    }
+
+    /**
+     * If convertToSchema is true, the data from generator will be parsed into columns and
+     * converted to `schema`. Otherwise, it just outputs the raw data (as a single STRING column).
+     */
+    def df(convertToSchema: Boolean, numPartition: Int): DataFrame = {
+      val generatedData = dataGenerator.generate(sparkContext, name, numPartition, scaleFactor)
+      val rows = generatedData.mapPartitions { iter =>
+        iter.map { l =>
+          if (convertToSchema) {
+            val values = l.split("\\|", -1).dropRight(1).map { v =>
+              if (v.equals("")) {
+                // If the string value is an empty string, we turn it to a null
+                null
+              } else {
+                v
+              }
+            }
+            Row.fromSeq(values)
+          } else {
+            Row.fromSeq(Seq(l))
+          }
+        }
+      }
+
+      if (convertToSchema) {
+        val stringData =
+          sqlContext.createDataFrame(
+            rows,
+            StructType(schema.fields.map(f => StructField(f.name, StringType))))
+
+        val convertedData = {
+          val columns = schema.fields.map { f =>
+            col(f.name).cast(f.dataType).as(f.name)
+          }
+          stringData.select(columns: _*)
+        }
+
+        convertedData
+      } else {
+        sqlContext.createDataFrame(rows, StructType(Seq(StructField("value", StringType))))
+      }
+    }
+
+    def convertTypes(): Table = {
+      val newFields = fields.map { field =>
+        val newDataType = field.dataType match {
+          case _: DecimalType if useDoubleForDecimal => DoubleType
+          case _: DateType if useStringForDate => StringType
+          case other => other
+        }
+        field.copy(dataType = newDataType)
+      }
+
+      Table(name, partitionColumns, newFields: _*)
+    }
+
+    def genData(
+        location: String,
+        format: String,
+        overwrite: Boolean,
+        clusterByPartitionColumns: Boolean,
+        filterOutNullPartitionValues: Boolean,
+        numPartitions: Int): Unit = {
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Ignore
+
+      val data = df(format != "text", numPartitions)
+      val tempTableName = s"${name}_text"
+      data.createOrReplaceTempView(tempTableName)
+
+      val writer = if (partitionColumns.nonEmpty) {
+        if (clusterByPartitionColumns) {
+          val columnString = data.schema.fields
+            .map { field =>
+              field.name
+            }
+            .mkString(",")
+          val partitionColumnString = partitionColumns.mkString(",")
+          val predicates = if (filterOutNullPartitionValues) {
+            partitionColumns.map(col => s"$col IS NOT NULL").mkString("WHERE ", " AND ", "")
+          } else {
+            ""
+          }
+
+          val query =
+            s"""
+               |SELECT
+               |  $columnString
+               |FROM
+               |  $tempTableName
+               |$predicates
+               |DISTRIBUTE BY
+               |  $partitionColumnString
+            """.stripMargin
+          val grouped = sqlContext.sql(query)
+          println(s"Pre-clustering with partitioning columns with query $query.")
+          log.info(s"Pre-clustering with partitioning columns with query $query.")
+          grouped.write
+        } else {
+          data.write
+        }
+      } else {
+        // treat non-partitioned tables as "one partition" that we want to coalesce
+        if (clusterByPartitionColumns) {
+          // in case data has more than maxRecordsPerFile, split into multiple writers to improve
+          // datagen speed files will be truncated to maxRecordsPerFile value, so the final result
+          // will be the same
+          val numRows = data.count
+          val maxRecordPerFile =
+            Try(sqlContext.getConf("spark.sql.files.maxRecordsPerFile").toInt).getOrElse(0)
+
+          println(
+            s"Data has $numRows rows clustered $clusterByPartitionColumns for $maxRecordPerFile")
+          log.info(
+            s"Data has $numRows rows clustered $clusterByPartitionColumns for $maxRecordPerFile")
+
+          if (maxRecordPerFile > 0 && numRows > maxRecordPerFile) {
+            val numFiles = (numRows.toDouble / maxRecordPerFile).ceil.toInt
+            println(s"Coalescing into $numFiles files")
+            log.info(s"Coalescing into $numFiles files")
+            data.coalesce(numFiles).write
+          } else {
+            data.coalesce(1).write
+          }
+        } else {
+          data.write
+        }
+      }
+      writer.format(format).mode(mode)
+      if (partitionColumns.nonEmpty) {
+        writer.partitionBy(partitionColumns: _*)
+      }
+      println(s"Generating table $name in database to $location with save mode $mode.")
+      log.info(s"Generating table $name in database to $location with save mode $mode.")
+      writer.save(location)
+      sqlContext.dropTempTable(tempTableName)
+    }
+  }
+
+  def genData(
+      location: String,
+      format: String,
+      overwrite: Boolean,
+      partitionTables: Boolean,
+      clusterByPartitionColumns: Boolean,
+      filterOutNullPartitionValues: Boolean,
+      tableFilter: String = "",
+      numPartitions: Int = 100): Unit = {
+    var tablesToBeGenerated = if (partitionTables) {
+      tables
+    } else {
+      tables.map(_.nonPartitioned)
+    }
+
+    if (tableFilter.nonEmpty) {
+      tablesToBeGenerated = tablesToBeGenerated.filter(_.name == tableFilter)
+      if (tablesToBeGenerated.isEmpty) {
+        throw new RuntimeException("Bad table name filter: " + tableFilter)
+      }
+    }
+
+    tablesToBeGenerated.foreach { table =>
+      val tableLocation = s"$location/${table.name}"
+      table.genData(
+        tableLocation,
+        format,
+        overwrite,
+        clusterByPartitionColumns,
+        filterOutNullPartitionValues,
+        numPartitions)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala
new file mode 100644
index 000000000..190fb2304
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometAggregateBenchmark.scala
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.types.DecimalType
+
+import org.apache.comet.CometConf
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * {{{
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometAggregateBenchmark
+ * }}}
+ *
+ * Results will be written to "spark/benchmarks/CometAggregateBenchmark-**results.txt".
+ */
+object CometAggregateBenchmark extends CometBenchmarkBase {
+  override def getSparkSession: SparkSession = {
+    val session = super.getSparkSession
+    session.conf.set("parquet.enable.dictionary", "false")
+    session.conf.set("spark.sql.shuffle.partitions", "2")
+    session
+  }
+
+  def singleGroupAndAggregate(values: Int, groupingKeyCardinality: Int): Unit = {
+    val benchmark =
+      new Benchmark(
+        s"Grouped HashAgg Exec: single group key (cardinality $groupingKeyCardinality), single aggregate",
+        values,
+        output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"SELECT value, floor(rand() * $groupingKeyCardinality) as key FROM $tbl"))
+
+        val query = "SELECT key, SUM(value) FROM parquetV1Table GROUP BY key"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def singleGroupAndAggregateDecimal(
+      values: Int,
+      dataType: DecimalType,
+      groupingKeyCardinality: Int): Unit = {
+    val benchmark =
+      new Benchmark(
+        s"Grouped HashAgg Exec: single group key (cardinality $groupingKeyCardinality), single aggregate on decimal",
+        values,
+        output = output)
+
+    val df = makeDecimalDataFrame(values, dataType, false);
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        df.createOrReplaceTempView(tbl)
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT dec as value, floor(rand() * $groupingKeyCardinality) as key FROM $tbl"))
+
+        val query = "SELECT key, SUM(value) FROM parquetV1Table GROUP BY key"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def multiGroupKeys(values: Int, groupingKeyCard: Int): Unit = {
+    val benchmark =
+      new Benchmark(
+        s"Grouped HashAgg Exec: multiple group keys (cardinality $groupingKeyCard), single aggregate",
+        values,
+        output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT value, floor(rand() * $groupingKeyCard) as key1, " +
+              s"floor(rand() * $groupingKeyCard) as key2 FROM $tbl"))
+
+        val query = "SELECT key1, key2, SUM(value) FROM parquetV1Table GROUP BY key1, key2"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_MEMORY_OVERHEAD.key -> "1G") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+            CometConf.COMET_MEMORY_OVERHEAD.key -> "1G") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def multiAggregates(values: Int, groupingKeyCard: Int): Unit = {
+    val benchmark =
+      new Benchmark(
+        s"Grouped HashAgg Exec: single group key (cardinality $groupingKeyCard), multiple aggregates",
+        values,
+        output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT value as value1, value as value2, floor(rand() * $groupingKeyCard) as key " +
+              s"FROM $tbl"))
+
+        val query = "SELECT key, SUM(value1), SUM(value2) FROM parquetV1Table GROUP BY key"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val total = 1024 * 1024 * 10
+    val combinations = List(100, 1024, 1024 * 1024) // number of distinct groups
+
+    runBenchmarkWithTable("Grouped Aggregate (single group key + single aggregate)", total) { v =>
+      for (card <- combinations) {
+        singleGroupAndAggregate(v, card)
+      }
+    }
+
+    runBenchmarkWithTable("Grouped Aggregate (multiple group keys + single aggregate)", total) {
+      v =>
+        for (card <- combinations) {
+          multiGroupKeys(v, card)
+        }
+    }
+
+    runBenchmarkWithTable("Grouped Aggregate (single group key + multiple aggregates)", total) {
+      v =>
+        for (card <- combinations) {
+          multiAggregates(v, card)
+        }
+    }
+
+    runBenchmarkWithTable(
+      "Grouped Aggregate (single group key + single aggregate on decimal)",
+      total) { v =>
+      for (card <- combinations) {
+        singleGroupAndAggregateDecimal(v, DecimalType(18, 10), card)
+      }
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala
new file mode 100644
index 000000000..cd3fcadbd
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.types._
+
+import org.apache.comet.CometConf
+
+/**
+ * Benchmark to measure Comet expression evaluation performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometArithmeticBenchmark` Results will be written to
+ * "spark/benchmarks/CometArithmeticBenchmark-**results.txt".
+ */
+object CometArithmeticBenchmark extends CometBenchmarkBase {
+  private val table = "parquetV1Table"
+
+  def integerArithmeticBenchmark(values: Int, op: BinaryOp, useDictionary: Boolean): Unit = {
+    val dataType = IntegerType
+    val benchmark = new Benchmark(
+      s"Binary op ${dataType.sql}, dictionary = $useDictionary",
+      values,
+      output = output)
+
+    withTempPath { dir =>
+      withTempTable(table) {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT CAST(value AS ${dataType.sql}) AS c1, " +
+              s"CAST(value AS ${dataType.sql}) c2 FROM $tbl"))
+
+        benchmark.addCase(s"$op ($dataType) - Spark") { _ =>
+          spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+        }
+
+        benchmark.addCase(s"$op ($dataType) - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+          }
+        }
+
+        benchmark.addCase(s"$op ($dataType) - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def decimalArithmeticBenchmark(
+      values: Int,
+      dataType: DecimalType,
+      op: BinaryOp,
+      useDictionary: Boolean): Unit = {
+    val benchmark = new Benchmark(
+      s"Binary op ${dataType.sql}, dictionary = $useDictionary",
+      values,
+      output = output)
+    val df = makeDecimalDataFrame(values, dataType, useDictionary)
+
+    withTempPath { dir =>
+      withTempTable(table) {
+        df.createOrReplaceTempView(tbl)
+        prepareTable(dir, spark.sql(s"SELECT dec AS c1, dec AS c2 FROM $tbl"))
+
+        benchmark.addCase(s"$op ($dataType) - Spark") { _ =>
+          spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+        }
+
+        benchmark.addCase(s"$op ($dataType) - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+          }
+        }
+
+        benchmark.addCase(s"$op ($dataType) - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(s"SELECT c1 ${op.sig} c2 FROM $table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  private val TOTAL: Int = 1024 * 1024 * 10
+
+  override def runCometBenchmark(args: Array[String]): Unit = {
+    Seq(true, false).foreach { useDictionary =>
+      Seq(Plus, Mul, Div).foreach { op =>
+        for ((precision, scale) <- Seq((5, 2), (18, 10), (38, 37))) {
+          runBenchmark(op.name) {
+            decimalArithmeticBenchmark(TOTAL, DecimalType(precision, scale), op, useDictionary)
+          }
+        }
+      }
+    }
+
+    Seq(true, false).foreach { useDictionary =>
+      Seq(Minus, Mul).foreach { op =>
+        runBenchmarkWithTable(op.name, TOTAL, useDictionary) { v =>
+          integerArithmeticBenchmark(v, op, useDictionary)
+        }
+      }
+    }
+  }
+
+  private val Plus: BinaryOp = BinaryOp("plus", "+")
+  private val Minus: BinaryOp = BinaryOp("minus", "-")
+  private val Mul: BinaryOp = BinaryOp("mul", "*")
+  private val Div: BinaryOp = BinaryOp("div", "/")
+
+  case class BinaryOp(name: String, sig: String) {
+    override def toString: String = name
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala
new file mode 100644
index 000000000..f37040020
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import java.io.File
+
+import scala.util.Random
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DecimalType
+
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions
+
+trait CometBenchmarkBase extends SqlBasedBenchmark {
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName("CometReadBenchmark")
+      // Since `spark.master` always exists, overrides this value
+      .set("spark.master", "local[1]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+
+    val sparkSession = SparkSession.builder
+      .config(conf)
+      .withExtensions(new CometSparkSessionExtensions)
+      .getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+    sparkSession.conf.set(CometConf.COMET_ENABLED.key, "false")
+    sparkSession.conf.set(CometConf.COMET_EXEC_ENABLED.key, "false")
+
+    sparkSession
+  }
+
+  def runCometBenchmark(args: Array[String]): Unit
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runCometBenchmark(mainArgs)
+  }
+
+  protected val tbl = "comet_table"
+
+  protected def withTempTable(tableNames: String*)(f: => Unit): Unit = {
+    try f
+    finally tableNames.foreach(spark.catalog.dropTempView)
+  }
+
+  protected def runBenchmarkWithTable(
+      benchmarkName: String,
+      values: Int,
+      useDictionary: Boolean = false)(f: Int => Any): Unit = {
+    withTempTable(tbl) {
+      import spark.implicits._
+      spark
+        .range(values)
+        .map(_ => if (useDictionary) Random.nextLong % 5 else Random.nextLong)
+        .createOrReplaceTempView(tbl)
+      runBenchmark(benchmarkName)(f(values))
+    }
+  }
+
+  /** Runs function `f` with Comet on and off. */
+  final def runWithComet(name: String, cardinality: Long)(f: => Unit): Unit = {
+    val benchmark = new Benchmark(name, cardinality, output = output)
+
+    benchmark.addCase(s"$name - Spark ") { _ =>
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        f
+      }
+    }
+
+    benchmark.addCase(s"$name - Comet") { _ =>
+      withSQLConf(
+        CometConf.COMET_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ENABLED.key -> "true",
+        CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+        SQLConf.ANSI_ENABLED.key -> "false") {
+        f
+      }
+    }
+
+    benchmark.run()
+  }
+
+  protected def prepareTable(dir: File, df: DataFrame, partition: Option[String] = None): Unit = {
+    val testDf = if (partition.isDefined) {
+      df.write.partitionBy(partition.get)
+    } else {
+      df.write
+    }
+
+    saveAsParquetV1Table(testDf, dir.getCanonicalPath + "/parquetV1")
+  }
+
+  protected def saveAsParquetV1Table(df: DataFrameWriter[Row], dir: String): Unit = {
+    df.mode("overwrite").option("compression", "snappy").parquet(dir)
+    spark.read.parquet(dir).createOrReplaceTempView("parquetV1Table")
+  }
+
+  protected def makeDecimalDataFrame(
+      values: Int,
+      decimal: DecimalType,
+      useDictionary: Boolean): DataFrame = {
+    import spark.implicits._
+
+    val div = if (useDictionary) 5 else values
+    spark
+      .range(values)
+      .map(_ % div)
+      .select((($"value" - 500) / 100.0) cast decimal as Symbol("dec"))
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala
new file mode 100644
index 000000000..032b9469a
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+import org.apache.comet.CometConf
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometConditionalExpressionBenchmark` Results will be
+ * written to "spark/benchmarks/CometConditionalExpressionBenchmark-**results.txt".
+ */
+object CometConditionalExpressionBenchmark extends CometBenchmarkBase {
+
+  def caseWhenExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Case When Expr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT value AS c1 FROM $tbl"))
+
+        val query =
+          "select CASE WHEN c1 < 0 THEN '<0' WHEN c1 = 0 THEN '=0' ELSE '>0' END from parquetV1Table"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def ifExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("If Expr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT value AS c1 FROM $tbl"))
+        val query = "select IF (c1 < 0, '<0', '>=0') from parquetV1Table"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024;
+
+    runBenchmarkWithTable("caseWhenExpr", values) { v =>
+      caseWhenExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("ifExpr", values) { v =>
+      ifExprBenchmark(v)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala
new file mode 100644
index 000000000..0af1ecade
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometDatetimeExpressionBenchmark` Results will be
+ * written to "spark/benchmarks/CometDatetimeExpressionBenchmark-**results.txt".
+ */
+object CometDatetimeExpressionBenchmark extends CometBenchmarkBase {
+
+  def dateTruncExprBenchmark(values: Int, useDictionary: Boolean): Unit = {
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"select cast(timestamp_micros(cast(value/100000 as integer)) as date) as dt FROM $tbl"))
+        Seq("YEAR", "YYYY", "YY", "MON", "MONTH", "MM").foreach { level =>
+          val isDictionary = if (useDictionary) "(Dictionary)" else ""
+          runWithComet(s"Date Truncate $isDictionary - $level", values) {
+            spark.sql(s"select trunc(dt, '$level') from parquetV1Table").noop()
+          }
+        }
+      }
+    }
+  }
+
+  def timestampTruncExprBenchmark(values: Int, useDictionary: Boolean): Unit = {
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"select timestamp_micros(cast(value/100000 as integer)) as ts FROM $tbl"))
+        Seq(
+          "YEAR",
+          "YYYY",
+          "YY",
+          "MON",
+          "MONTH",
+          "MM",
+          "DAY",
+          "DD",
+          "HOUR",
+          "MINUTE",
+          "SECOND",
+          "WEEK",
+          "QUARTER").foreach { level =>
+          val isDictionary = if (useDictionary) "(Dictionary)" else ""
+          runWithComet(s"Timestamp Truncate $isDictionary - $level", values) {
+            spark.sql(s"select date_trunc('$level', ts) from parquetV1Table").noop()
+          }
+        }
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024;
+
+    withDefaultTimeZone(LA) {
+      withSQLConf(
+        SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId,
+        "spark.sql.parquet.datetimeRebaseModeInWrite" -> "CORRECTED") {
+
+        runBenchmarkWithTable("DateTrunc", values) { v =>
+          dateTruncExprBenchmark(v, useDictionary = false)
+        }
+        runBenchmarkWithTable("DateTrunc (Dictionary)", values, useDictionary = true) { v =>
+          dateTruncExprBenchmark(v, useDictionary = true)
+        }
+        runBenchmarkWithTable("TimestampTrunc", values) { v =>
+          timestampTruncExprBenchmark(v, useDictionary = false)
+        }
+        runBenchmarkWithTable("TimestampTrunc (Dictionary)", values, useDictionary = true) { v =>
+          timestampTruncExprBenchmark(v, useDictionary = true)
+        }
+      }
+    }
+  }
+
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala
new file mode 100644
index 000000000..8f3f2438d
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
+
+import org.apache.comet.{CometConf, CometSparkSessionExtensions}
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometExecBenchmark` Results will be written to
+ * "spark/benchmarks/CometExecBenchmark-**results.txt".
+ */
+object CometExecBenchmark extends CometBenchmarkBase {
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName("CometExecBenchmark")
+      .set("spark.master", "local[5]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+      .set("spark.executor.memoryOverhead", "10g")
+
+    val sparkSession = SparkSession.builder
+      .config(conf)
+      .withExtensions(new CometSparkSessionExtensions)
+      .getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+    sparkSession.conf.set(CometConf.COMET_ENABLED.key, "false")
+    sparkSession.conf.set(CometConf.COMET_EXEC_ENABLED.key, "false")
+    sparkSession.conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "10g")
+    // TODO: support dictionary encoding in vectorized execution
+    sparkSession.conf.set("parquet.enable.dictionary", "false")
+    sparkSession.conf.set("spark.sql.shuffle.partitions", "2")
+
+    sparkSession
+  }
+
+  def numericFilterExecBenchmark(values: Int, fractionOfZeros: Double): Unit = {
+    val percentageOfZeros = fractionOfZeros * 100
+    val benchmark =
+      new Benchmark(s"Project + Filter Exec ($percentageOfZeros% zeros)", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfZeros, -1, value) AS c1, value AS c2 FROM " +
+              s"$tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def subqueryExecBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Subquery", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"SELECT value as col1, value + 100 as col2, value + 10 as col3 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(
+            "SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " +
+              "col2, col3 FROM parquetV1Table")
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(
+              "SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " +
+                "col2, col3 FROM parquetV1Table")
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(
+              "SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " +
+                "col2, col3 FROM parquetV1Table")
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def sortExecBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Sort Exec", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT * FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def expandExecBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expand Exec", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"SELECT value as col1, value + 100 as col2, value + 10 as col3 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark
+            .sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " +
+              "GROUP BY col1, col2 GROUPING SETS ((col1), (col2))")
+            .noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark
+              .sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " +
+                "GROUP BY col1, col2 GROUPING SETS ((col1), (col2))")
+              .noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark
+              .sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " +
+                "GROUP BY col1, col2 GROUPING SETS ((col1), (col2))")
+              .noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    runBenchmarkWithTable("Subquery", 1024 * 1024 * 10) { v =>
+      subqueryExecBenchmark(v)
+    }
+
+    runBenchmarkWithTable("Expand", 1024 * 1024 * 10) { v =>
+      expandExecBenchmark(v)
+    }
+
+    runBenchmarkWithTable("Project + Filter", 1024 * 1024 * 10) { v =>
+      for (fractionOfZeros <- List(0.0, 0.50, 0.95)) {
+        numericFilterExecBenchmark(v, fractionOfZeros)
+      }
+    }
+
+    runBenchmarkWithTable("Sort", 1024 * 1024 * 10) { v =>
+      sortExecBenchmark(v)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala
new file mode 100644
index 000000000..7f1f4b44e
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+import org.apache.comet.CometConf
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometPredicateExpressionBenchmark` Results will be
+ * written to "spark/benchmarks/CometPredicateExpressionBenchmark -**results.txt".
+ */
+object CometPredicateExpressionBenchmark extends CometBenchmarkBase {
+
+  def inExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("in Expr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            "select CASE WHEN value < 0 THEN 'negative'" +
+              s" WHEN value = 0 THEN 'zero' ELSE 'positive' END c1 from $tbl"))
+        val query = "select * from parquetV1Table where c1 in ('positive', 'zero')"
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(query).noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql(query).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024;
+
+    runBenchmarkWithTable("inExpr", values) { v =>
+      inExprBenchmark(v)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala
new file mode 100644
index 000000000..4c2f832af
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometReadBenchmark.scala
@@ -0,0 +1,437 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnVector
+
+import org.apache.comet.{CometConf, TestUtils}
+import org.apache.comet.parquet.BatchReader
+
+/**
+ * Benchmark to measure Comet read performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometReadBenchmark` Results will be written to
+ * "spark/benchmarks/CometReadBenchmark-**results.txt".
+ */
+object CometReadBenchmark extends CometBenchmarkBase {
+
+  def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
+    // Benchmarks running through spark sql.
+    val sqlBenchmark =
+      new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM $tbl"))
+
+        val query = dataType match {
+          case BooleanType => "sum(cast(id as bigint))"
+          case _ => "sum(id)"
+        }
+
+        sqlBenchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(s"select $query from parquetV1Table").noop()
+        }
+
+        sqlBenchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(s"select $query from parquetV1Table").noop()
+          }
+        }
+
+        sqlBenchmark.run()
+      }
+    }
+  }
+
+  def decimalScanBenchmark(values: Int, precision: Int, scale: Int): Unit = {
+    val sqlBenchmark = new Benchmark(
+      s"SQL Single Decimal(precision: $precision, scale: $scale) Column Scan",
+      values,
+      output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT CAST(value / 10000000.0 as DECIMAL($precision, $scale)) " +
+              s"id FROM $tbl"))
+
+        sqlBenchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select sum(id) from parquetV1Table").noop()
+        }
+
+        sqlBenchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select sum(id) from parquetV1Table").noop()
+          }
+        }
+
+        sqlBenchmark.run()
+      }
+    }
+  }
+
+  def readerBenchmark(values: Int, dataType: DataType): Unit = {
+    val sqlBenchmark =
+      new Benchmark(s"Parquet reader benchmark for $dataType", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM $tbl"))
+
+        val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
+        val vectorizedReaderBatchSize = CometConf.COMET_BATCH_SIZE.get(spark.sessionState.conf)
+
+        var longSum = 0L
+        var doubleSum = 0.0
+        val aggregateValue: (ColumnVector, Int) => Unit = dataType match {
+          case BooleanType => (col: ColumnVector, i: Int) => if (col.getBoolean(i)) longSum += 1
+          case ByteType => (col: ColumnVector, i: Int) => longSum += col.getByte(i)
+          case ShortType => (col: ColumnVector, i: Int) => longSum += col.getShort(i)
+          case IntegerType => (col: ColumnVector, i: Int) => longSum += col.getInt(i)
+          case LongType => (col: ColumnVector, i: Int) => longSum += col.getLong(i)
+          case FloatType => (col: ColumnVector, i: Int) => doubleSum += col.getFloat(i)
+          case DoubleType => (col: ColumnVector, i: Int) => doubleSum += col.getDouble(i)
+          case StringType =>
+            (col: ColumnVector, i: Int) => longSum += col.getUTF8String(i).toLongExact
+        }
+
+        val files = TestUtils.listDirectory(new File(dir, "parquetV1"))
+
+        sqlBenchmark.addCase("ParquetReader Spark") { _ =>
+          files.map(_.asInstanceOf[String]).foreach { p =>
+            val reader = new VectorizedParquetRecordReader(
+              enableOffHeapColumnVector,
+              vectorizedReaderBatchSize)
+            try {
+              reader.initialize(p, ("id" :: Nil).asJava)
+              val batch = reader.resultBatch()
+              val column = batch.column(0)
+              var totalNumRows = 0
+              while (reader.nextBatch()) {
+                val numRows = batch.numRows()
+                var i = 0
+                while (i < numRows) {
+                  if (!column.isNullAt(i)) aggregateValue(column, i)
+                  i += 1
+                }
+                totalNumRows += batch.numRows()
+              }
+            } finally {
+              reader.close()
+            }
+          }
+        }
+
+        sqlBenchmark.addCase("ParquetReader Comet") { _ =>
+          files.map(_.asInstanceOf[String]).foreach { p =>
+            val reader = new BatchReader(p, vectorizedReaderBatchSize)
+            reader.init()
+            try {
+              var totalNumRows = 0
+              while (reader.nextBatch()) {
+                val batch = reader.currentBatch()
+                val column = batch.column(0)
+                val numRows = batch.numRows()
+                var i = 0
+                while (i < numRows) {
+                  if (!column.isNullAt(i)) aggregateValue(column, i)
+                  i += 1
+                }
+                totalNumRows += batch.numRows()
+              }
+            } finally {
+              reader.close()
+            }
+          }
+        }
+
+        sqlBenchmark.run()
+      }
+    }
+  }
+
+  def numericFilterScanBenchmark(values: Int, fractionOfZeros: Double): Unit = {
+    val percentageOfZeros = fractionOfZeros * 100
+    val benchmark =
+      new Benchmark(s"Numeric Filter Scan ($percentageOfZeros% zeros)", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table", "parquetV2Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfZeros, -1, value) AS c1, value AS c2 FROM " +
+              s"$tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select sum(c2) from parquetV1Table where c1 + 1 > 0").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select sum(c2) from parquetV1Table where c1 + 1 > 0").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def stringWithDictionaryScanBenchmark(values: Int): Unit = {
+    val sqlBenchmark =
+      new Benchmark("String Scan with Dictionary Encoding", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table", "parquetV2Table") {
+        prepareTable(
+          dir,
+          spark.sql(s"""
+             |WITH tmp
+             |  AS (SELECT RAND() r FROM $tbl)
+             |SELECT
+             |  CASE
+             |    WHEN r < 0.2 THEN 'aaa'
+             |    WHEN r < 0.4 THEN 'bbb'
+             |    WHEN r < 0.6 THEN 'ccc'
+             |    WHEN r < 0.8 THEN 'ddd'
+             |    ELSE 'eee'
+             |  END
+             |AS id
+             |FROM tmp
+             |""".stripMargin))
+
+        sqlBenchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select sum(length(id)) from parquetV1Table").noop()
+        }
+
+        sqlBenchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select sum(length(id)) from parquetV1Table").noop()
+          }
+        }
+
+        sqlBenchmark.run()
+      }
+    }
+  }
+
+  def stringWithNullsScanBenchmark(values: Int, fractionOfNulls: Double): Unit = {
+    val percentageOfNulls = fractionOfNulls * 100
+    val benchmark =
+      new Benchmark(s"String with Nulls Scan ($percentageOfNulls%)", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfNulls, NULL, CAST(value as STRING)) AS c1, " +
+              s"IF(RAND(2) < $fractionOfNulls, NULL, CAST(value as STRING)) AS c2 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark
+            .sql("select sum(length(c2)) from parquetV1Table where c1 is " +
+              "not NULL and c2 is not NULL")
+            .noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark
+              .sql("select sum(length(c2)) from parquetV1Table where c1 is " +
+                "not NULL and c2 is not NULL")
+              .noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def columnsBenchmark(values: Int, width: Int): Unit = {
+    val benchmark =
+      new Benchmark(s"Single Column Scan from $width columns", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("t1", "parquetV1Table") {
+        val middle = width / 2
+        val selectExpr = (1 to width).map(i => s"value as c$i")
+        spark.table(tbl).selectExpr(selectExpr: _*).createOrReplaceTempView("t1")
+
+        prepareTable(dir, spark.sql("SELECT * FROM t1"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql(s"SELECT sum(c$middle) FROM parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql(s"SELECT sum(c$middle) FROM parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def largeStringFilterScanBenchmark(values: Int, fractionOfZeros: Double): Unit = {
+    val percentageOfZeros = fractionOfZeros * 100
+    val benchmark =
+      new Benchmark(
+        s"Large String Filter Scan ($percentageOfZeros% zeros)",
+        values,
+        output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfZeros, -1, value) AS c1, " +
+              s"REPEAT(CAST(value AS STRING), 100) AS c2 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def sortedLgStrFilterScanBenchmark(values: Int, fractionOfZeros: Double): Unit = {
+    val percentageOfZeros = fractionOfZeros * 100
+    val benchmark =
+      new Benchmark(
+        s"Sorted Lg Str Filter Scan ($percentageOfZeros% zeros)",
+        values,
+        output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table", "parquetV2Table") {
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfZeros, -1, value) AS c1, " +
+              s"REPEAT(CAST(value AS STRING), 100) AS c2 FROM $tbl ORDER BY c1, c2"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("SELECT * FROM parquetV1Table WHERE c1 + 1 > 0").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    runBenchmarkWithTable("Parquet Reader", 1024 * 1024 * 15) { v =>
+      Seq(
+        BooleanType,
+        ByteType,
+        ShortType,
+        IntegerType,
+        LongType,
+        FloatType,
+        DoubleType,
+        StringType).foreach { dataType =>
+        readerBenchmark(v, dataType)
+      }
+    }
+
+    runBenchmarkWithTable("SQL Single Numeric Column Scan", 1024 * 1024 * 15) { v =>
+      Seq(BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType)
+        .foreach { dataType =>
+          numericScanBenchmark(v, dataType)
+        }
+    }
+
+    runBenchmark("SQL Decimal Column Scan") {
+      withTempTable(tbl) {
+        import spark.implicits._
+        spark.range(1024 * 1024 * 15).map(_ => Random.nextInt).createOrReplaceTempView(tbl)
+
+        Seq((5, 2), (18, 4), (20, 8)).foreach { case (precision, scale) =>
+          decimalScanBenchmark(1024 * 1024 * 15, precision, scale)
+        }
+      }
+    }
+
+    runBenchmarkWithTable("String Scan with Dictionary", 1024 * 1024 * 15) { v =>
+      stringWithDictionaryScanBenchmark(v)
+    }
+
+    runBenchmarkWithTable("Numeric Filter Scan", 1024 * 1024 * 10) { v =>
+      for (fractionOfZeros <- List(0.0, 0.50, 0.95)) {
+        numericFilterScanBenchmark(v, fractionOfZeros)
+      }
+    }
+
+    runBenchmarkWithTable("String with Nulls Scan", 1024 * 1024 * 10) { v =>
+      for (fractionOfNulls <- List(0.0, 0.50, 0.95)) {
+        stringWithNullsScanBenchmark(v, fractionOfNulls)
+      }
+    }
+
+    runBenchmarkWithTable("Single Column Scan From Wide Columns", 1024 * 1024 * 1) { v =>
+      for (columnWidth <- List(10, 50, 100)) {
+        columnsBenchmark(v, columnWidth)
+      }
+    }
+
+    runBenchmarkWithTable("Large String Filter Scan", 1024 * 1024) { v =>
+      for (fractionOfZeros <- List(0.0, 0.50, 0.999)) {
+        largeStringFilterScanBenchmark(v, fractionOfZeros)
+      }
+    }
+
+    runBenchmarkWithTable("Sorted Lg Str Filter Scan", 1024 * 1024) { v =>
+      for (fractionOfZeros <- List(0.0, 0.50, 0.999)) {
+        sortedLgStrFilterScanBenchmark(v, fractionOfZeros)
+      }
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala
new file mode 100644
index 000000000..eac68b93c
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala
@@ -0,0 +1,632 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+import org.apache.comet.CometConf
+
+/**
+ * Benchmark to measure Comet execution performance. To run this benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
+ * benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark` Results will be
+ * written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt".
+ */
+object CometStringExpressionBenchmark extends CometBenchmarkBase {
+
+  def subStringExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Substring Expr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select substring(c1, 1, 100) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select substring(c1, 1, 100) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select substring(c1, 1, 100) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def stringSpaceExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("StringSpace Expr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT CAST(RAND(1) * 100 AS INTEGER) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select space(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select space(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select space(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def asciiExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr ascii", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select ascii(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select ascii(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select ascii(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def bitLengthExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr bit_length", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select bit_length(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select bit_length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select bit_length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def octetLengthExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr octet_length", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select octet_length(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select octet_length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select octet_length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def upperExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr upper", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select upper(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select upper(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select upper(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def lowerExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr lower", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select lower(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select lower(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select lower(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def chrExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr chr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select chr(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select chr(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select chr(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def initCapExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr initCap", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select initCap(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select initCap(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select initCap(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def trimExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr trim", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select trim(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select trim(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select trim(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def concatwsExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr concatws", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select concat_ws(' ', c1, c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select concat_ws(' ', c1, c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select concat_ws(' ', c1, c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def lengthExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr length", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select length(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select length(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def repeatExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr repeat", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select repeat(c1, 3) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select repeat(c1, 3) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select repeat(c1, 3) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def reverseExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr reverse", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select reverse(c1) from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select reverse(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select reverse(c1) from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def instrExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr instr", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select instr(c1, '123') from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select instr(c1, '123') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select instr(c1, '123') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def replaceExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr replace", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select replace(c1, '123', 'abc') from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select replace(c1, '123', 'abc') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select replace(c1, '123', 'abc') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  def translateExprBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Expr translate", values, output = output)
+
+    withTempPath { dir =>
+      withTempTable("parquetV1Table") {
+        prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
+
+        benchmark.addCase("SQL Parquet - Spark") { _ =>
+          spark.sql("select translate(c1, '123456', 'aBcDeF') from parquetV1Table").noop()
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
+          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+            spark.sql("select translate(c1, '123456', 'aBcDeF') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+          withSQLConf(
+            CometConf.COMET_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ENABLED.key -> "true",
+            CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+            spark.sql("select translate(c1, '123456', 'aBcDeF') from parquetV1Table").noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024;
+
+    runBenchmarkWithTable("Substring", values) { v =>
+      subStringExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("StringSpace", values) { v =>
+      stringSpaceExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("ascii", values) { v =>
+      asciiExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("bitLength", values) { v =>
+      bitLengthExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("octet_length", values) { v =>
+      octetLengthExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("upper", values) { v =>
+      upperExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("lower", values) { v =>
+      lowerExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("chr", values) { v =>
+      chrExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("initCap", values) { v =>
+      initCapExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("trim", values) { v =>
+      trimExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("concatws", values) { v =>
+      concatwsExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("repeat", values) { v =>
+      repeatExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("length", values) { v =>
+      lengthExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("reverse", values) { v =>
+      reverseExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("instr", values) { v =>
+      instrExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("replace", values) { v =>
+      replaceExprBenchmark(v)
+    }
+
+    runBenchmarkWithTable("translate", values) { v =>
+      translateExprBenchmark(v)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala
new file mode 100644
index 000000000..1be5d685d
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCDSQueryBenchmark.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.sql.{TPCDSQueries, TPCDSSchema}
+import org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark.tables
+import org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmarkArguments
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Benchmark to measure Comet TPCDS query performance.
+ *
+ * To run this benchmark:
+ * {{{
+ * // Build [tpcds-kit](https://github.com/databricks/tpcds-kit)
+ * cd /tmp && git clone https://github.com/databricks/tpcds-kit.git
+ * cd tpcds-kit/tools && make OS=MACOS
+ *
+ * // GenTPCDSData
+ * cd $COMET_HOME && mkdir /tmp/tpcds
+ * make benchmark-org.apache.spark.sql.GenTPCDSData -- --dsdgenDir /tmp/tpcds-kit/tools --location /tmp/tpcds --scaleFactor 1
+ *
+ * // CometTPCDSQueryBenchmark
+ * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCDSQueryBenchmark -- --data-location /tmp/tpcds
+ * }}}
+ *
+ * Results will be written to "spark/benchmarks/CometTPCDSQueryBenchmark-**results.txt".
+ */
+object CometTPCDSQueryBenchmark extends CometTPCQueryBenchmarkBase with TPCDSQueries {
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val benchmarkArgs = new TPCDSQueryBenchmarkArguments(mainArgs)
+
+    // If `--query-filter` defined, filters the queries that this option selects
+    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
+    val queriesV2_7ToRun = filterQueries(
+      tpcdsQueriesV2_7,
+      benchmarkArgs.queryFilter,
+      nameSuffix = nameSuffixForQueriesV2_7)
+
+    if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
+      throw new RuntimeException(
+        s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
+    }
+
+    val tableSizes = setupTables(
+      benchmarkArgs.dataLocation,
+      createTempView = false,
+      tables,
+      TPCDSSchemaHelper.getTableColumns)
+
+    setupCBO(cometSpark, benchmarkArgs.cboEnabled, tables)
+
+    runQueries("tpcds", queries = queriesV1_4ToRun, tableSizes, "TPCDS Snappy")
+    runQueries(
+      "tpcds-v2.7.0",
+      queries = queriesV2_7ToRun,
+      tableSizes,
+      "TPCDS Snappy",
+      nameSuffix = nameSuffixForQueriesV2_7)
+  }
+}
+
+object TPCDSSchemaHelper extends TPCDSSchema {
+  def getTableColumns: Map[String, StructType] =
+    tableColumns.map(kv => kv._1 -> StructType.fromDDL(kv._2))
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala
new file mode 100644
index 000000000..af1ee3a49
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCHQueryBenchmark.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import java.util.Locale
+
+/**
+ * Benchmark to measure Comet TPCH query performance.
+ *
+ * To run this benchmark:
+ * {{{
+ * // Set scale factor in GB
+ * scale_factor=1
+ *
+ * // GenTPCHData to create the data set at /tmp/tpch/sf1_parquet
+ * cd $COMET_HOME
+ * make benchmark-org.apache.spark.sql.GenTPCHData -- --location /tmp --scaleFactor ${scale_factor}
+ *
+ * // CometTPCHQueryBenchmark
+ * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometTPCHQueryBenchmark -- --data-location /tmp/tpch/sf${scale_factor}_parquet
+ * }}}
+ *
+ * Results will be written to "spark/benchmarks/CometTPCHQueryBenchmark-**results.txt".
+ */
+object CometTPCHQueryBenchmark extends CometTPCQueryBenchmarkBase {
+  val tables: Seq[String] =
+    Seq("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier")
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val benchmarkArgs = new TPCHQueryBenchmarkArguments(mainArgs)
+
+    // List of all TPC-H queries
+    val tpchQueries = (1 to 22).map(n => s"q$n")
+
+    // If `--query-filter` defined, filters the queries that this option selects
+    val queries = filterQueries(tpchQueries, benchmarkArgs.queryFilter)
+
+    if (queries.isEmpty) {
+      throw new RuntimeException(
+        s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
+    }
+
+    val tableSizes =
+      setupTables(benchmarkArgs.dataLocation, createTempView = !benchmarkArgs.cboEnabled, tables)
+
+    setupCBO(cometSpark, benchmarkArgs.cboEnabled, tables)
+
+    runQueries("tpch", queries, tableSizes, "TPCH Snappy")
+    runQueries("tpch-extended", queries, tableSizes, " TPCH Extended Snappy")
+  }
+}
+
+/**
+ * Mostly copied from TPCDSQueryBenchmarkArguments. Only the help message is different TODO: make
+ * TPCDSQueryBenchmarkArguments extensible to avoid copies
+ */
+class TPCHQueryBenchmarkArguments(val args: Array[String]) {
+  var dataLocation: String = null
+  var queryFilter: Set[String] = Set.empty
+  var cboEnabled: Boolean = false
+
+  parseArgs(args.toList)
+  validateArguments()
+
+  private def optionMatch(optionName: String, s: String): Boolean = {
+    optionName == s.toLowerCase(Locale.ROOT)
+  }
+
+  private def parseArgs(inputArgs: List[String]): Unit = {
+    var args = inputArgs
+
+    while (args.nonEmpty) {
+      args match {
+        case optName :: value :: tail if optionMatch("--data-location", optName) =>
+          dataLocation = value
+          args = tail
+
+        case optName :: value :: tail if optionMatch("--query-filter", optName) =>
+          queryFilter = value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet
+          args = tail
+
+        case optName :: tail if optionMatch("--cbo", optName) =>
+          cboEnabled = true
+          args = tail
+
+        case _ =>
+          // scalastyle:off println
+          System.err.println("Unknown/unsupported param " + args)
+          // scalastyle:on println
+          printUsageAndExit(1)
+      }
+    }
+  }
+
+  private def printUsageAndExit(exitCode: Int): Unit = {
+    // scalastyle:off
+    System.err.println("""
+      |Usage: spark-submit --class <this class> <spark sql test jar> [Options]
+      |Options:
+      |  --data-location      Path to TPCH data
+      |  --query-filter       Queries to filter, e.g., q3,q5,q13
+      |  --cbo                Whether to enable cost-based optimization
+      |
+      |------------------------------------------------------------------------------------------------------------------
+      |In order to run this benchmark, please follow the instructions of
+      |org.apache.spark.sql.GenTPCHData to generate the TPCH data.
+      |Thereafter, the value of <TPCH data location> needs to be set to the location where the generated data is stored.
+      """.stripMargin)
+    // scalastyle:on
+    System.exit(exitCode)
+  }
+
+  private def validateArguments(): Unit = {
+    if (dataLocation == null) {
+      // scalastyle:off println
+      System.err.println("Must specify a data location")
+      // scalastyle:on println
+      printUsageAndExit(-1)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala
new file mode 100644
index 000000000..d23d76fe1
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometTPCQueryBenchmarkBase.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{CometTPCQueryBase, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
+import org.apache.spark.sql.catalyst.util.resourceToString
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+
+import org.apache.comet.CometConf
+
+/**
+ * Base class for CometTPCDSQueryBenchmark and CometTPCHQueryBenchmark Mostly copied from
+ * TPCDSQueryBenchmark. TODO: make TPCDSQueryBenchmark extensible to avoid copies
+ */
+trait CometTPCQueryBenchmarkBase extends SqlBasedBenchmark with CometTPCQueryBase with Logging {
+  override def getSparkSession: SparkSession = cometSpark
+
+  protected def runQueries(
+      queryLocation: String,
+      queries: Seq[String],
+      tableSizes: Map[String, Long],
+      benchmarkName: String,
+      nameSuffix: String = ""): Unit = {
+    queries.foreach { name =>
+      val queryString = resourceToString(
+        s"$queryLocation/$name.sql",
+        classLoader = Thread.currentThread().getContextClassLoader)
+
+      // This is an indirect hack to estimate the size of each query's input by traversing the
+      // logical plan and adding up the sizes of all tables that appear in the plan.
+      val queryRelations = scala.collection.mutable.HashSet[String]()
+      cometSpark.sql(queryString).queryExecution.analyzed.foreach {
+        case SubqueryAlias(alias, _: LogicalRelation) =>
+          queryRelations.add(alias.name)
+        case LogicalRelation(_, _, Some(catalogTable), _) =>
+          queryRelations.add(catalogTable.identifier.table)
+        case HiveTableRelation(tableMeta, _, _, _, _) =>
+          queryRelations.add(tableMeta.identifier.table)
+        case _ =>
+      }
+      val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum
+      val benchmark = new Benchmark(benchmarkName, numRows, 2, output = output)
+      benchmark.addCase(s"$name$nameSuffix") { _ =>
+        cometSpark.sql(queryString).noop()
+      }
+      benchmark.addCase(s"$name$nameSuffix") { _ =>
+        cometSpark.sql(queryString).noop()
+      }
+      benchmark.addCase(s"$name$nameSuffix: Comet (Scan)") { _ =>
+        withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
+          cometSpark.sql(queryString).noop()
+        }
+      }
+      benchmark.addCase(s"$name$nameSuffix: Comet (Scan, Exec)") { _ =>
+        withSQLConf(
+          CometConf.COMET_ENABLED.key -> "true",
+          CometConf.COMET_EXEC_ENABLED.key -> "true",
+          CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
+          cometSpark.sql(queryString).noop()
+        }
+      }
+      benchmark.run()
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala
new file mode 100644
index 000000000..55ed06442
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.sql.comet
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import scala.collection.mutable
+
+import org.apache.commons.io.FileUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.TPCDSBase
+import org.apache.spark.sql.catalyst.expressions.AttributeSet
+import org.apache.spark.sql.catalyst.util.resourceToString
+import org.apache.spark.sql.execution.{FormattedMode, ReusedSubqueryExec, SparkPlan, SubqueryBroadcastExec, SubqueryExec}
+import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
+import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec, ValidateRequirements}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.TestSparkSession
+
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+
+/**
+ * Similar to [[org.apache.spark.sql.PlanStabilitySuite]], checks that TPC-DS Comet plans don't
+ * change.
+ *
+ * If there are plan differences, the error message looks like this: Plans did not match: last
+ * approved simplified plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/simplified.txt
+ * last approved explain plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/explain.txt
+ * [last approved simplified plan]
+ *
+ * actual simplified plan: /path/to/tmp/q1.actual.simplified.txt actual explain plan:
+ * /path/to/tmp/q1.actual.explain.txt [actual simplified plan]
+ *
+ * To run the entire test suite, for instance `CometTPCDSV2_7_PlanStabilitySuite`:
+ * {{{
+ *   mvn -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" test
+ * }}}
+ *
+ * To re-generate golden files for entire suite, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 mvn -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" test
+ * }}}
+ */
+trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBase {
+  protected val baseResourcePath: File = {
+    getWorkspaceFilePath("spark", "src", "test", "resources", "tpcds-plan-stability").toFile
+  }
+
+  private val referenceRegex = "#\\d+".r
+  private val normalizeRegex = "#\\d+L?".r
+  private val planIdRegex = "plan_id=\\d+".r
+
+  private val clsName = this.getClass.getCanonicalName
+
+  def goldenFilePath: String
+
+  private val approvedAnsiPlans: Seq[String] = Seq("q83", "q83.sf100")
+
+  private def getDirForTest(name: String): File = {
+    val goldenFileName = if (SQLConf.get.ansiEnabled && approvedAnsiPlans.contains(name)) {
+      name + ".ansi"
+    } else {
+      name
+    }
+    new File(goldenFilePath, goldenFileName)
+  }
+
+  private def isApproved(
+      dir: File,
+      actualSimplifiedPlan: String,
+      actualExplain: String): Boolean = {
+    val simplifiedFile = new File(dir, "simplified.txt")
+    val expectedSimplified = FileUtils.readFileToString(simplifiedFile, StandardCharsets.UTF_8)
+    lazy val explainFile = new File(dir, "explain.txt")
+    lazy val expectedExplain = FileUtils.readFileToString(explainFile, StandardCharsets.UTF_8)
+    expectedSimplified == actualSimplifiedPlan && expectedExplain == actualExplain
+  }
+
+  /**
+   * Serialize and save this SparkPlan. The resulting file is used by [[checkWithApproved]] to
+   * check stability.
+   *
+   * @param plan
+   *   the SparkPlan
+   * @param name
+   *   the name of the query
+   * @param explain
+   *   the full explain output; this is saved to help debug later as the simplified plan is not
+   *   too useful for debugging
+   */
+  private def generateGoldenFile(plan: SparkPlan, name: String, explain: String): Unit = {
+    val dir = getDirForTest(name)
+    val simplified = getSimplifiedPlan(plan)
+    val foundMatch = dir.exists() && isApproved(dir, simplified, explain)
+
+    if (!foundMatch) {
+      FileUtils.deleteDirectory(dir)
+      assert(dir.mkdirs())
+
+      val file = new File(dir, "simplified.txt")
+      FileUtils.writeStringToFile(file, simplified, StandardCharsets.UTF_8)
+      val fileOriginalPlan = new File(dir, "explain.txt")
+      FileUtils.writeStringToFile(fileOriginalPlan, explain, StandardCharsets.UTF_8)
+      logDebug(s"APPROVED: $file $fileOriginalPlan")
+    }
+  }
+
+  private def checkWithApproved(plan: SparkPlan, name: String, explain: String): Unit = {
+    val dir = getDirForTest(name)
+    val tempDir = FileUtils.getTempDirectory
+    val actualSimplified = getSimplifiedPlan(plan)
+    val foundMatch = isApproved(dir, actualSimplified, explain)
+
+    if (!foundMatch) {
+      // show diff with last approved
+      val approvedSimplifiedFile = new File(dir, "simplified.txt")
+      val approvedExplainFile = new File(dir, "explain.txt")
+
+      val actualSimplifiedFile = new File(tempDir, s"$name.actual.simplified.txt")
+      val actualExplainFile = new File(tempDir, s"$name.actual.explain.txt")
+
+      val approvedSimplified =
+        FileUtils.readFileToString(approvedSimplifiedFile, StandardCharsets.UTF_8)
+      // write out for debugging
+      FileUtils.writeStringToFile(actualSimplifiedFile, actualSimplified, StandardCharsets.UTF_8)
+      FileUtils.writeStringToFile(actualExplainFile, explain, StandardCharsets.UTF_8)
+
+      fail(s"""
+           |Plans did not match:
+           |last approved simplified plan: ${approvedSimplifiedFile.getAbsolutePath}
+           |last approved explain plan: ${approvedExplainFile.getAbsolutePath}
+           |
+           |$approvedSimplified
+           |
+           |actual simplified plan: ${actualSimplifiedFile.getAbsolutePath}
+           |actual explain plan: ${actualExplainFile.getAbsolutePath}
+           |
+           |$actualSimplified
+        """.stripMargin)
+    }
+  }
+
+  /**
+   * Get the simplified plan for a specific SparkPlan. In the simplified plan, the node only has
+   * its name and all the sorted reference and produced attributes names(without ExprId) and its
+   * simplified children as well. And we'll only identify the performance sensitive nodes, e.g.,
+   * Exchange, Subquery, in the simplified plan. Given such a identical but simplified plan, we'd
+   * expect to avoid frequent plan changing and catch the possible meaningful regression.
+   */
+  private def getSimplifiedPlan(plan: SparkPlan): String = {
+    val exchangeIdMap = new mutable.HashMap[Int, Int]()
+    val subqueriesMap = new mutable.HashMap[Int, Int]()
+
+    def getId(plan: SparkPlan): Int = plan match {
+      case exchange: Exchange =>
+        exchangeIdMap.getOrElseUpdate(exchange.id, exchangeIdMap.size + 1)
+      case ReusedExchangeExec(_, exchange) =>
+        exchangeIdMap.getOrElseUpdate(exchange.id, exchangeIdMap.size + 1)
+      case subquery: SubqueryExec =>
+        subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1)
+      case subquery: SubqueryBroadcastExec =>
+        subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1)
+      case ReusedSubqueryExec(subquery) =>
+        subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1)
+      case _ => -1
+    }
+
+    /**
+     * Some expression names have ExprId in them due to using things such as
+     * "sum(sr_return_amt#14)", so we remove all of these using regex
+     */
+    def cleanUpReferences(references: AttributeSet): String = {
+      referenceRegex.replaceAllIn(references.map(_.name).mkString(","), "")
+    }
+
+    /**
+     * Generate a simplified plan as a string Example output: TakeOrderedAndProject
+     * [c_customer_id] WholeStageCodegen Project [c_customer_id]
+     */
+    def simplifyNode(node: SparkPlan, depth: Int): String = {
+      val padding = "  " * depth
+      var thisNode = node.nodeName
+      if (node.references.nonEmpty) {
+        thisNode += s" [${cleanUpReferences(node.references)}]"
+      }
+      if (node.producedAttributes.nonEmpty) {
+        thisNode += s" [${cleanUpReferences(node.producedAttributes)}]"
+      }
+      val id = getId(node)
+      if (id > 0) {
+        thisNode += s" #$id"
+      }
+      val childrenSimplified = node.children.map(simplifyNode(_, depth + 1))
+      val subqueriesSimplified = node.subqueries.map(simplifyNode(_, depth + 1))
+      s"$padding$thisNode\n${subqueriesSimplified.mkString("")}${childrenSimplified.mkString("")}"
+    }
+
+    simplifyNode(plan, 0)
+  }
+
+  private def normalizeIds(plan: String): String = {
+    val map = new mutable.HashMap[String, String]()
+    normalizeRegex
+      .findAllMatchIn(plan)
+      .map(_.toString)
+      .foreach(map.getOrElseUpdate(_, (map.size + 1).toString))
+    val exprIdNormalized =
+      normalizeRegex.replaceAllIn(plan, regexMatch => s"#${map(regexMatch.toString)}")
+
+    // Normalize the plan id in Exchange nodes. See `Exchange.stringArgs`.
+    val planIdMap = new mutable.HashMap[String, String]()
+    planIdRegex
+      .findAllMatchIn(exprIdNormalized)
+      .map(_.toString)
+      .foreach(planIdMap.getOrElseUpdate(_, (planIdMap.size + 1).toString))
+    planIdRegex.replaceAllIn(
+      exprIdNormalized,
+      regexMatch => s"plan_id=${planIdMap(regexMatch.toString)}")
+  }
+
+  private def normalizeLocation(plan: String): String = {
+    plan.replaceAll(
+      s"Location.*$clsName/",
+      "Location [not included in comparison]/{warehouse_dir}/")
+  }
+
+  /**
+   * Test a TPC-DS query. Depending on the settings this test will either check if the plan
+   * matches a golden file or it will create a new golden file.
+   */
+  protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = {
+    // Only run the tests in Spark 3.4+
+    assume(isSpark34Plus)
+
+    val queryString = resourceToString(
+      s"$tpcdsGroup/$query.sql",
+      classLoader = Thread.currentThread().getContextClassLoader)
+    // Disable char/varchar read-side handling for better performance.
+    withSQLConf(
+      CometConf.COMET_ENABLED.key -> "true",
+      CometConf.COMET_EXEC_ENABLED.key -> "true",
+      CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+      CometConf.COMET_EXEC_ALL_EXPR_ENABLED.key -> "true",
+      "spark.sql.readSideCharPadding" -> "false",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      val qe = sql(queryString).queryExecution
+      val plan = qe.executedPlan
+      val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))
+
+      assert(ValidateRequirements.validate(plan))
+
+      if (shouldRegenerateGoldenFiles) {
+        generateGoldenFile(plan, query + suffix, explain)
+      } else {
+        checkWithApproved(plan, query + suffix, explain)
+      }
+    }
+  }
+
+  protected override def createSparkSession: TestSparkSession = {
+    val conf = super.sparkConf
+    conf.set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
+    conf.set(CometConf.COMET_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ENABLED.key, "true")
+    conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "1g")
+    conf.set(CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key, "true")
+    conf.set(CometConf.COMET_EXEC_ALL_EXPR_ENABLED.key, "true")
+
+    new TestSparkSession(new SparkContext("local[1]", this.getClass.getCanonicalName, conf))
+  }
+
+  // TODO: remove once Spark 3.2 & 3.3 is no longer supported
+  private val shouldRegenerateGoldenFiles: Boolean =
+    System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+}
+
+class CometTPCDSV1_4_PlanStabilitySuite extends CometPlanStabilitySuite {
+  override val goldenFilePath: String =
+    new File(baseResourcePath, "approved-plans-v1_4").getAbsolutePath
+
+  tpcdsQueries.foreach { q =>
+    test(s"check simplified (tpcds-v1.4/$q)") {
+      testQuery("tpcds", q)
+    }
+  }
+}
+
+class CometTPCDSV2_7_PlanStabilitySuite extends CometPlanStabilitySuite {
+  override val goldenFilePath: String =
+    new File(baseResourcePath, "approved-plans-v2_7").getAbsolutePath
+
+  tpcdsQueriesV2_7_0.foreach { q =>
+    test(s"check simplified (tpcds-v2.7.0/$q)") {
+      testQuery("tpcds-v2.7.0", q)
+    }
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala
new file mode 100644
index 000000000..5bb7e8f70
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetDatetimeRebaseSuite.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{CometTestBase, DataFrame, Dataset, Row}
+import org.apache.spark.sql.internal.SQLConf
+
+import org.apache.comet.CometConf
+
+// This test checks if Comet reads ancient dates & timestamps that are before 1582, as if they are
+// read according to the `LegacyBehaviorPolicy.CORRECTED` mode (i.e., no rebase) in Spark.
+abstract class ParquetDatetimeRebaseSuite extends CometTestBase {
+
+  // This is a flag defined in Spark's `org.apache.spark.internal.config.Tests` but is only
+  // visible under package `spark`.
+  val SPARK_TESTING: String = "spark.testing"
+
+  test("reading ancient dates before 1582") {
+    Seq(true, false).foreach { exceptionOnRebase =>
+      withSQLConf(
+        CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.key ->
+          exceptionOnRebase.toString) {
+        Seq("2_4_5", "2_4_6", "3_2_0").foreach { sparkVersion =>
+          val file =
+            getResourceParquetFilePath(
+              s"test-data/before_1582_date_v$sparkVersion.snappy.parquet")
+          val df = spark.read.parquet(file)
+
+          // Parquet file written by 2.4.5 should throw exception for both Spark and Comet
+          if (exceptionOnRebase || sparkVersion == "2_4_5") {
+            intercept[SparkException](df.collect())
+          } else {
+            checkSparkNoRebaseAnswer(df)
+          }
+        }
+      }
+    }
+  }
+
+  test("reading ancient timestamps before 1582") {
+    Seq(true, false).foreach { exceptionOnRebase =>
+      withSQLConf(
+        CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.key ->
+          exceptionOnRebase.toString) {
+        Seq("2_4_5", "2_4_6", "3_2_0").foreach { sparkVersion =>
+          Seq("micros", "millis").foreach { timestampUnit =>
+            val file = getResourceParquetFilePath(
+              s"test-data/before_1582_timestamp_${timestampUnit}_v${sparkVersion}.snappy.parquet")
+            val df = spark.read.parquet(file)
+
+            // Parquet file written by 2.4.5 should throw exception for both Spark and Comet
+            if (exceptionOnRebase || sparkVersion == "2_4_5") {
+              intercept[SparkException](df.collect())
+            } else {
+              checkSparkNoRebaseAnswer(df)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("reading ancient int96 timestamps before 1582") {
+    Seq(true, false).foreach { exceptionOnRebase =>
+      withSQLConf(
+        CometConf.COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP.key ->
+          exceptionOnRebase.toString) {
+        Seq("2_4_5", "2_4_6", "3_2_0").foreach { sparkVersion =>
+          Seq("dict", "plain").foreach { parquetEncoding =>
+            val file = getResourceParquetFilePath(
+              s"test-data/before_1582_timestamp_int96_${parquetEncoding}_v${sparkVersion}.snappy.parquet")
+            val df = spark.read.parquet(file)
+
+            // Parquet file written by 2.4.5 should throw exception for both Spark and Comet
+            if (exceptionOnRebase || sparkVersion == "2_4_5") {
+              intercept[SparkException](df.collect())
+            } else {
+              checkSparkNoRebaseAnswer(df)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private def checkSparkNoRebaseAnswer(df: => DataFrame): Unit = {
+    var expected: Array[Row] = Array.empty
+
+    withSQLConf(CometConf.COMET_ENABLED.key -> "false", "spark.test.forceNoRebase" -> "true") {
+
+      val previousPropertyValue = Option.apply(System.getProperty(SPARK_TESTING))
+      System.setProperty(SPARK_TESTING, "true")
+
+      val dfSpark = Dataset.ofRows(spark, df.logicalPlan)
+      expected = dfSpark.collect()
+
+      previousPropertyValue match {
+        case Some(v) => System.setProperty(SPARK_TESTING, v)
+        case None => System.clearProperty(SPARK_TESTING)
+      }
+    }
+
+    val dfComet = Dataset.ofRows(spark, df.logicalPlan)
+    checkAnswer(dfComet, expected)
+  }
+}
+
+class ParquetDatetimeRebaseV1Suite extends ParquetDatetimeRebaseSuite {
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    super.test(testName, testTags: _*)(withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+      testFun
+    })(pos)
+  }
+}
+
+class ParquetDatetimeRebaseV2Suite extends ParquetDatetimeRebaseSuite {
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    super.test(testName, testTags: _*)(withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      testFun
+    })(pos)
+  }
+}
diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala
new file mode 100644
index 000000000..0d1a105ff
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/comet/ParquetEncryptionITCase.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import java.io.File
+import java.io.RandomAccessFile
+import java.nio.charset.StandardCharsets
+import java.util.Base64
+
+import org.junit.runner.RunWith
+import org.scalactic.source.Position
+import org.scalatest.Tag
+import org.scalatestplus.junit.JUnitRunner
+
+import org.apache.spark.{DebugFilesystem, SparkConf}
+import org.apache.spark.sql.{QueryTest, SparkSession, SQLContext}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+
+import org.apache.comet.{CometConf, CometSparkSessionExtensions, IntegrationTestSuite}
+
+/**
+ * A integration test suite that tests parquet modular encryption usage.
+ */
+@RunWith(classOf[JUnitRunner])
+@IntegrationTestSuite
+class ParquetEncryptionITCase extends QueryTest with SQLTestUtils {
+  private val encoder = Base64.getEncoder
+  private val footerKey =
+    encoder.encodeToString("0123456789012345".getBytes(StandardCharsets.UTF_8))
+  private val key1 = encoder.encodeToString("1234567890123450".getBytes(StandardCharsets.UTF_8))
+  private val key2 = encoder.encodeToString("1234567890123451".getBytes(StandardCharsets.UTF_8))
+
+  test("SPARK-34990: Write and read an encrypted parquet") {
+    import testImplicits._
+
+    Seq("org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory").foreach {
+      factoryClass =>
+        withTempDir { dir =>
+          withSQLConf(
+            "parquet.crypto.factory.class" -> factoryClass,
+            "parquet.encryption.kms.client.class" ->
+              "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS",
+            "parquet.encryption.key.list" ->
+              s"footerKey: ${footerKey}, key1: ${key1}, key2: ${key2}") {
+
+            // Make sure encryption works with multiple Parquet files
+            val inputDF = spark
+              .range(0, 2000)
+              .map(i => (i, i.toString, i.toFloat))
+              .repartition(10)
+              .toDF("a", "b", "c")
+            val parquetDir = new File(dir, "parquet").getCanonicalPath
+            inputDF.write
+              .option("parquet.encryption.column.keys", "key1: a, b; key2: c")
+              .option("parquet.encryption.footer.key", "footerKey")
+              .parquet(parquetDir)
+
+            verifyParquetEncrypted(parquetDir)
+
+            val parquetDF = spark.read.parquet(parquetDir)
+            assert(parquetDF.inputFiles.nonEmpty)
+            val readDataset = parquetDF.select("a", "b", "c")
+            checkAnswer(readDataset, inputDF)
+          }
+        }
+    }
+  }
+
+  test("SPARK-37117: Can't read files in Parquet encryption external key material mode") {
+    import testImplicits._
+
+    Seq("org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory").foreach {
+      factoryClass =>
+        withTempDir { dir =>
+          withSQLConf(
+            "parquet.crypto.factory.class" -> factoryClass,
+            "parquet.encryption.kms.client.class" ->
+              "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS",
+            "parquet.encryption.key.material.store.internally" -> "false",
+            "parquet.encryption.key.list" ->
+              s"footerKey: ${footerKey}, key1: ${key1}, key2: ${key2}") {
+
+            val inputDF = spark
+              .range(0, 2000)
+              .map(i => (i, i.toString, i.toFloat))
+              .repartition(10)
+              .toDF("a", "b", "c")
+            val parquetDir = new File(dir, "parquet").getCanonicalPath
+            inputDF.write
+              .option("parquet.encryption.column.keys", "key1: a, b; key2: c")
+              .option("parquet.encryption.footer.key", "footerKey")
+              .parquet(parquetDir)
+
+            val parquetDF = spark.read.parquet(parquetDir)
+            assert(parquetDF.inputFiles.nonEmpty)
+            val readDataset = parquetDF.select("a", "b", "c")
+            checkAnswer(readDataset, inputDF)
+          }
+        }
+    }
+  }
+
+  protected def sparkConf: SparkConf = {
+    val conf = new SparkConf()
+    conf.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
+    conf
+  }
+
+  protected def createSparkSession: SparkSession = {
+    SparkSession
+      .builder()
+      .config(sparkConf)
+      .master("local[1]")
+      .withExtensions(new CometSparkSessionExtensions)
+      .getOrCreate()
+  }
+
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
+      pos: Position): Unit = {
+    Seq("true", "false").foreach { cometEnabled =>
+      super.test(testName + s" Comet($cometEnabled)", testTags: _*) {
+        withSQLConf(
+          CometConf.COMET_ENABLED.key -> cometEnabled,
+          CometConf.COMET_EXEC_ENABLED.key -> "true",
+          CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true",
+          SQLConf.ANSI_ENABLED.key -> "true") {
+          testFun
+        }
+      }
+    }
+  }
+
+  protected override def beforeAll(): Unit = {
+    if (_spark == null) _spark = createSparkSession
+    super.beforeAll()
+  }
+
+  private var _spark: SparkSession = _
+  protected implicit def spark: SparkSession = _spark
+  protected implicit def sqlContext: SQLContext = _spark.sqlContext
+
+  /**
+   * Verify that the directory contains an encrypted parquet in encrypted footer mode by means of
+   * checking for all the parquet part files in the parquet directory that their magic string is
+   * PARE, as defined in the spec:
+   * https://github.com/apache/parquet-format/blob/master/Encryption.md#54-encrypted-footer-mode
+   */
+  private def verifyParquetEncrypted(parquetDir: String): Unit = {
+    val parquetPartitionFiles = getListOfParquetFiles(new File(parquetDir))
+    assert(parquetPartitionFiles.size >= 1)
+    parquetPartitionFiles.foreach { parquetFile =>
+      val magicString = "PARE"
+      val magicStringLength = magicString.length()
+      val byteArray = new Array[Byte](magicStringLength)
+      val randomAccessFile = new RandomAccessFile(parquetFile, "r")
+      try {
+        randomAccessFile.read(byteArray, 0, magicStringLength)
+      } finally {
+        randomAccessFile.close()
+      }
+      val stringRead = new String(byteArray, StandardCharsets.UTF_8)
+      assert(magicString == stringRead)
+    }
+  }
+
+  private def getListOfParquetFiles(dir: File): List[File] = {
+    dir.listFiles.filter(_.isFile).toList.filter { file =>
+      file.getName.endsWith("parquet")
+    }
+  }
+}