From 84380f997d0b5e2f43497801d9073edb0d655344 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 30 Aug 2021 16:53:20 +0200 Subject: [PATCH 1/4] fix: Allow parquet to be compiled without arrow `--no-default-features` is currently broken in the parquet crate due to arrow being required. With some small tweaks it can be made entirely optional. Added some extra steps to catch when `--no-default-features` does not work on CI as well. --- .github/workflows/rust.yml | 3 +++ parquet/Cargo.toml | 3 ++- parquet/src/data_type.rs | 3 +-- parquet/src/util/bit_util.rs | 9 +++++++++ parquet/src/util/mod.rs | 2 ++ 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d76192c689cb..5f2f21ac853a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -118,6 +118,9 @@ jobs: cargo run --example dynamic_types cargo run --example read_csv cargo run --example read_csv_infer_schema + (cd parquet && cargo check --no-default-features) + (cd arrow && cargo check --no-default-features) + (cd arrow-flight && cargo check --no-default-features) # test the --features "simd" of the arrow crate. This requires nightly. linux-test-simd: diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 9a3a24534bcf..830e33505e52 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -60,6 +60,7 @@ serde_json = { version = "1.0", features = ["preserve_order"] } [features] default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] cli = ["serde_json", "base64", "clap"] +test = [] [[ bin ]] name = "parquet-read" @@ -79,4 +80,4 @@ harness = false [[bench]] name = "arrow_array_reader" -harness = false \ No newline at end of file +harness = false diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index dadcba1e23a8..8c64e8629463 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -585,10 +585,9 @@ impl AsBytes for str { pub(crate) mod private { use crate::encodings::decoding::PlainDecoderDetails; - use crate::util::bit_util::{BitReader, BitWriter}; + use crate::util::bit_util::{round_upto_power_of_2, BitReader, BitWriter}; use crate::util::memory::ByteBufferPtr; - use arrow::util::bit_util::round_upto_power_of_2; use byteorder::ByteOrder; use std::convert::TryInto; diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index 4b34df478065..010ed32c8c8b 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -680,6 +680,15 @@ impl From> for BitReader { } } +/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must +/// be a power of 2. +/// +/// Copied from the arrow crate to make arrow optional +pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize { + debug_assert!(factor > 0 && (factor & (factor - 1)) == 0); + (num + (factor - 1)) & !(factor - 1) +} + #[cfg(test)] mod tests { use super::super::test_common::*; diff --git a/parquet/src/util/mod.rs b/parquet/src/util/mod.rs index 8f6d85d469e5..2c653ceef9b9 100644 --- a/parquet/src/util/mod.rs +++ b/parquet/src/util/mod.rs @@ -22,7 +22,9 @@ pub mod bit_util; mod bit_packing; pub mod cursor; pub mod hash_util; +#[cfg(feature = "test")] pub(crate) mod test_common; +#[cfg(feature = "test")] pub use self::test_common::page_util::{ DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator, }; From 782bcdf6d185628245a90d0f8b124fa37bf55c5a Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 31 Aug 2021 17:18:43 +0200 Subject: [PATCH 2/4] Fix CI --- parquet/Cargo.toml | 2 +- parquet/src/util/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 830e33505e52..0d1b4d775fac 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -60,7 +60,7 @@ serde_json = { version = "1.0", features = ["preserve_order"] } [features] default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] cli = ["serde_json", "base64", "clap"] -test = [] +test_common = [] [[ bin ]] name = "parquet-read" diff --git a/parquet/src/util/mod.rs b/parquet/src/util/mod.rs index 2c653ceef9b9..3a69df4360b2 100644 --- a/parquet/src/util/mod.rs +++ b/parquet/src/util/mod.rs @@ -22,9 +22,9 @@ pub mod bit_util; mod bit_packing; pub mod cursor; pub mod hash_util; -#[cfg(feature = "test")] +#[cfg(any(test, feature = "test_common"))] pub(crate) mod test_common; -#[cfg(feature = "test")] +#[cfg(any(test, feature = "test_common"))] pub use self::test_common::page_util::{ DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator, }; From 2a8c832979468e0bec0d7adef1fe853ee52d4651 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Wed, 1 Sep 2021 10:57:44 +0200 Subject: [PATCH 3/4] Fix path on CI --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5f2f21ac853a..cfb343ecc107 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -118,6 +118,8 @@ jobs: cargo run --example dynamic_types cargo run --example read_csv cargo run --example read_csv_infer_schema + # Exit arrow directory + cd .. (cd parquet && cargo check --no-default-features) (cd arrow && cargo check --no-default-features) (cd arrow-flight && cargo check --no-default-features) From 1b9bafc08fb1c91fb21176806aa1079e4e83b584 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Wed, 1 Sep 2021 10:58:20 +0200 Subject: [PATCH 4/4] --features test_common is needed for clippy --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cfb343ecc107..2014faeb24b3 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -239,7 +239,7 @@ jobs: run: | export CARGO_HOME="/github/home/.cargo" export CARGO_TARGET_DIR="/github/home/target" - cargo clippy --all-targets --workspace -- -D warnings -A clippy::redundant_field_names + cargo clippy --features test_common --all-targets --workspace -- -D warnings -A clippy::redundant_field_names lint: name: Lint