From 740397fddee96216e83545ac9dbcae798efa9b43 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 13 Jun 2024 07:34:08 -0400 Subject: [PATCH] Refine ParquetAccessPlan comments and tests --- .../src/datasource/physical_plan/parquet/access_plan.rs | 6 +++--- datafusion/core/src/datasource/physical_plan/parquet/mod.rs | 5 ++--- .../core/src/datasource/physical_plan/parquet/opener.rs | 2 ++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs index f51f2c49e896..e15e907cd9b8 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs @@ -384,7 +384,7 @@ mod test { let access_plan = ParquetAccessPlan::new(vec![ RowGroupAccess::Scan, RowGroupAccess::Selection( - // select / skip all 20 rows in row group 1 + // specifies all 20 rows in row group 1 vec![ RowSelector::select(5), RowSelector::skip(7), @@ -463,7 +463,7 @@ mod test { fn test_invalid_too_few() { let access_plan = ParquetAccessPlan::new(vec![ RowGroupAccess::Scan, - // select 12 rows, but row group 1 has 20 + // specify only 12 rows in selection, but row group 1 has 20 RowGroupAccess::Selection( vec![RowSelector::select(5), RowSelector::skip(7)].into(), ), @@ -484,7 +484,7 @@ mod test { fn test_invalid_too_many() { let access_plan = ParquetAccessPlan::new(vec![ RowGroupAccess::Scan, - // select 22 rows, but row group 1 has only 20 + // specify 22 rows in selection, but row group 1 has only 20 RowGroupAccess::Selection( vec![ RowSelector::select(10), diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index 5e5cc93bc54f..ec21c5504c69 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -156,9 +156,8 @@ pub use writer::plan_to_parquet; /// used to implement external indexes on top of parquet files and select only /// portions of the files. /// -/// The `ParquetExec` will try and further reduce any provided -/// `ParquetAccessPlan` further based on the contents of `ParquetMetadata` and -/// other settings. +/// The `ParquetExec` will try and reduce any provided `ParquetAccessPlan` +/// further based on the contents of `ParquetMetadata` and other settings. /// /// ## Example of providing a ParquetAccessPlan /// diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs index 8557c6d5f950..36335863032c 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs @@ -238,6 +238,8 @@ fn create_initial_plan( // check row group count matches the plan return Ok(access_plan.clone()); + } else { + debug!("ParquetExec Ignoring unknown extension specified for {file_name}"); } }