From 5cd598a4e190deedc660ef0b1af8fd9b09f24f01 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Sep 2024 13:39:06 -0400 Subject: [PATCH 01/28] testing FFI for table provider --- src/context.rs | 46 ++++++++- src/ffi.rs | 272 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + 3 files changed, 315 insertions(+), 5 deletions(-) create mode 100644 src/ffi.rs diff --git a/src/context.rs b/src/context.rs index c2a263fa7..b45fcdb75 100644 --- a/src/context.rs +++ b/src/context.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use std::sync::Arc; use arrow::array::RecordBatchReader; +use arrow::ffi::FFI_ArrowSchema; use arrow::ffi_stream::ArrowArrayStreamReader; use arrow::pyarrow::FromPyArrow; use datafusion::execution::session_state::SessionStateBuilder; @@ -36,6 +37,8 @@ use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; use crate::expr::sort_expr::PySortExpr; +use crate::expr::PyExpr; +use crate::ffi::FFI_TableProvider; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -54,11 +57,9 @@ use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; -use datafusion::datasource::MemTable; use datafusion::datasource::TableProvider; -use datafusion::execution::context::{ - DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext, -}; +use datafusion::datasource::{provider, MemTable}; +use datafusion::execution::context::{DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext}; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; use datafusion::execution::options::ReadOptions; @@ -67,7 +68,7 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; -use pyo3::types::{PyDict, PyList, PyTuple}; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -566,6 +567,41 @@ impl PySessionContext { Ok(()) } + /// Construct datafusion dataframe from Arrow Table + pub fn register_table_provider( + &mut self, + name: &str, + provider: Bound<'_, PyAny>, + py: Python, + ) -> PyResult<()> { + if provider.hasattr("__datafusion_table_provider__")? { + let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; + let capsule = capsule.downcast::()?; + // validate_pycapsule(capsule, "arrow_array_stream")?; + + let mut provider = unsafe { FFI_TableProvider::from_raw(capsule.pointer() as _) }; + + println!("Found provider version {}", provider.version); + + if let Some(s) = provider.schema { + let mut schema = FFI_ArrowSchema::empty(); + + let ret_code = unsafe { s(&mut provider, &mut schema) }; + + if ret_code == 0 { + let schema = Schema::try_from(&schema) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + println!("got schema {}", schema); + } else { + return Err(PyValueError::new_err(format!( + "Cannot get schema from input stream. Error code: {ret_code:?}" + ))); + } + } + } + Ok(()) + } + pub fn register_record_batches( &mut self, name: &str, diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 000000000..b1301e6cc --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,272 @@ +use std::{ + ffi::{c_char, c_int, c_void, CStr, CString}, + ptr::addr_of, + sync::Arc, +}; + +use arrow::{error::ArrowError, ffi::FFI_ArrowSchema}; +use datafusion::common::Result; +use datafusion::{ + catalog::{Session, TableProvider}, + common::DFSchema, + execution::{context::SessionState, session_state::SessionStateBuilder}, + physical_plan::ExecutionPlan, + prelude::{Expr, SessionConfig}, +}; +use tokio::runtime::Runtime; + +#[repr(C)] +#[derive(Debug)] +#[allow(non_camel_case_types)] +pub enum FFI_Constraint { + /// Columns with the given indices form a composite primary key (they are + /// jointly unique and not nullable): + PrimaryKey(Vec), + /// Columns with the given indices form a composite unique key: + Unique(Vec), +} + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_ExecutionPlan { + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_ExecutionPlan {} + +struct ExecutionPlanPrivateData { + plan: Arc, + last_error: Option, +} + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_SessionConfig { + pub version: i64, + + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_SessionConfig {} + +struct SessionConfigPrivateData { + config: SessionConfig, + last_error: Option, +} + +struct ExportedSessionConfig { + session: *mut FFI_SessionConfig, +} + +impl ExportedSessionConfig { + fn get_private_data(&mut self) -> &mut SessionConfigPrivateData { + unsafe { &mut *((*self.session).private_data as *mut SessionConfigPrivateData) } + } +} + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_Expr {} + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_TableProvider { + pub version: i64, + pub schema: Option< + unsafe extern "C" fn(provider: *mut FFI_TableProvider, out: *mut FFI_ArrowSchema) -> c_int, + >, + pub scan: Option< + unsafe extern "C" fn( + provider: *mut FFI_TableProvider, + session_config: *mut FFI_SessionConfig, + n_projections: c_int, + projections: *mut c_int, + n_filters: c_int, + filters: *mut *const c_char, + limit: c_int, + out: *mut FFI_ExecutionPlan, + ) -> c_int, + >, + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_TableProvider {} + +struct ProviderPrivateData { + provider: Box, + last_error: Option, +} + +struct ExportedTableProvider { + provider: *mut FFI_TableProvider, +} + +// The callback used to get array schema +unsafe extern "C" fn provider_schema( + provider: *mut FFI_TableProvider, + schema: *mut FFI_ArrowSchema, +) -> c_int { + ExportedTableProvider { provider }.schema(schema) +} + +unsafe extern "C" fn provider_scan( + provider: *mut FFI_TableProvider, + session_config: *mut FFI_SessionConfig, + n_projections: c_int, + projections: *mut c_int, + n_filters: c_int, + filters: *mut *const c_char, + limit: c_int, + mut out: *mut FFI_ExecutionPlan, +) -> c_int { + let config = unsafe { (*session_config).private_data as *const SessionConfigPrivateData }; + let session = SessionStateBuilder::new() + .with_config((*config).config.clone()) + .build(); + + let num_projections: usize = n_projections.try_into().unwrap_or(0); + + let projections: Vec = std::slice::from_raw_parts(projections, num_projections) + .iter() + .filter_map(|v| (*v).try_into().ok()) + .collect(); + let maybe_projections = match projections.is_empty() { + true => None, + false => Some(&projections), + }; + + let filters_slice = std::slice::from_raw_parts(filters, n_filters as usize); + let filters_vec: Vec = filters_slice + .iter() + .map(|&s| CStr::from_ptr(s).to_string_lossy().to_string()) + .collect(); + + let limit = limit.try_into().ok(); + + let plan = + ExportedTableProvider { provider }.scan(&session, maybe_projections, filters_vec, limit); + + match plan { + Ok(mut plan) => { + out = &mut plan; + 0 + } + Err(_) => 1, + } +} + +impl ExportedTableProvider { + fn get_private_data(&mut self) -> &mut ProviderPrivateData { + unsafe { &mut *((*self.provider).private_data as *mut ProviderPrivateData) } + } + + pub fn schema(&mut self, out: *mut FFI_ArrowSchema) -> i32 { + let private_data = self.get_private_data(); + let provider = &private_data.provider; + + let schema = FFI_ArrowSchema::try_from(provider.schema().as_ref()); + + match schema { + Ok(schema) => { + unsafe { std::ptr::copy(addr_of!(schema), out, 1) }; + std::mem::forget(schema); + 0 + } + Err(ref err) => { + private_data.last_error = Some( + CString::new(err.to_string()).expect("Error string has a null byte in it."), + ); + get_error_code(err) + } + } + } + + pub fn scan( + &mut self, + session: &SessionState, + projections: Option<&Vec>, + filters: Vec, + limit: Option, + ) -> Result { + let private_data = self.get_private_data(); + let provider = &private_data.provider; + + let schema = provider.schema(); + let df_schema: DFSchema = schema.try_into()?; + + let filter_exprs = filters + .into_iter() + .map(|expr_str| session.create_logical_expr(&expr_str, &df_schema)) + .collect::>>()?; + + let runtime = Runtime::new().unwrap(); + let plan = runtime.block_on(provider.scan(session, projections, &filter_exprs, limit))?; + + let plan_ptr = Box::new(ExecutionPlanPrivateData { + plan, + last_error: None, + }); + + Ok(FFI_ExecutionPlan { + private_data: Box::into_raw(plan_ptr) as *mut c_void, + }) + } +} + +const ENOMEM: i32 = 12; +const EIO: i32 = 5; +const EINVAL: i32 = 22; +const ENOSYS: i32 = 78; + +fn get_error_code(err: &ArrowError) -> i32 { + match err { + ArrowError::NotYetImplemented(_) => ENOSYS, + ArrowError::MemoryError(_) => ENOMEM, + ArrowError::IoError(_, _) => EIO, + _ => EINVAL, + } +} + +impl FFI_TableProvider { + /// Creates a new [`FFI_TableProvider`]. + pub fn new(provider: Box) -> Self { + let private_data = Box::new(ProviderPrivateData { + provider, + last_error: None, + }); + + Self { + version: 2, + schema: Some(provider_schema), + scan: Some(provider_scan), + private_data: Box::into_raw(private_data) as *mut c_void, + } + } + + /** + Replace temporary pointer with updated + # Safety + User must validate the raw pointer is valid. + */ + pub unsafe fn from_raw(raw_provider: *mut FFI_TableProvider) -> Self { + std::ptr::replace(raw_provider, Self::empty()) + } + + /// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C Stream Interface. + pub fn empty() -> Self { + Self { + version: 0, + schema: None, + scan: None, + private_data: std::ptr::null_mut(), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 0b57e0999..ce737f7a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,6 +61,8 @@ mod udf; mod udwf; pub mod utils; +pub mod ffi; + #[cfg(feature = "mimalloc")] #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; From 7a476d2a5570fbebc20861df7198713b1c491766 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Sep 2024 16:40:53 -0400 Subject: [PATCH 02/28] Was able to get round trip schema from datafusion -> delta table -> datafusion --- src/context.rs | 31 +++++------ src/ffi.rs | 138 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 122 insertions(+), 47 deletions(-) diff --git a/src/context.rs b/src/context.rs index b45fcdb75..b4ba6b229 100644 --- a/src/context.rs +++ b/src/context.rs @@ -583,21 +583,22 @@ impl PySessionContext { println!("Found provider version {}", provider.version); - if let Some(s) = provider.schema { - let mut schema = FFI_ArrowSchema::empty(); - - let ret_code = unsafe { s(&mut provider, &mut schema) }; - - if ret_code == 0 { - let schema = Schema::try_from(&schema) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - println!("got schema {}", schema); - } else { - return Err(PyValueError::new_err(format!( - "Cannot get schema from input stream. Error code: {ret_code:?}" - ))); - } - } + let schema = provider.schema(); + println!("Got schema through TableProvider trait {}", schema); + + // if let Some(s) = provider.schema { + // let mut schema = s(provider); + + // if ret_code == 0 { + // let schema = Schema::try_from(&schema) + // .map_err(|e| PyValueError::new_err(e.to_string()))?; + // println!("got schema {}", schema); + // } else { + // return Err(PyValueError::new_err(format!( + // "Cannot get schema from input stream. Error code: {ret_code:?}" + // ))); + // } + // } } Ok(()) } diff --git a/src/ffi.rs b/src/ffi.rs index b1301e6cc..d7d6d06ef 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -1,11 +1,16 @@ use std::{ + any::Any, ffi::{c_char, c_int, c_void, CStr, CString}, - ptr::addr_of, + ptr::{addr_of, addr_of_mut}, sync::Arc, }; -use arrow::{error::ArrowError, ffi::FFI_ArrowSchema}; -use datafusion::common::Result; +use arrow::{ + datatypes::{Schema, SchemaRef}, + error::ArrowError, + ffi::FFI_ArrowSchema, +}; +use async_trait::async_trait; use datafusion::{ catalog::{Session, TableProvider}, common::DFSchema, @@ -13,6 +18,9 @@ use datafusion::{ physical_plan::ExecutionPlan, prelude::{Expr, SessionConfig}, }; +use datafusion::{ + common::Result, datasource::TableType, logical_expr::TableProviderFilterPushDown, +}; use tokio::runtime::Runtime; #[repr(C)] @@ -80,9 +88,7 @@ pub struct FFI_Expr {} #[allow(non_camel_case_types)] pub struct FFI_TableProvider { pub version: i64, - pub schema: Option< - unsafe extern "C" fn(provider: *mut FFI_TableProvider, out: *mut FFI_ArrowSchema) -> c_int, - >, + pub schema: Option FFI_ArrowSchema>, pub scan: Option< unsafe extern "C" fn( provider: *mut FFI_TableProvider, @@ -99,6 +105,7 @@ pub struct FFI_TableProvider { } unsafe impl Send for FFI_TableProvider {} +unsafe impl Sync for FFI_TableProvider {} struct ProviderPrivateData { provider: Box, @@ -108,13 +115,14 @@ struct ProviderPrivateData { struct ExportedTableProvider { provider: *mut FFI_TableProvider, } +struct ConstExportedTableProvider { + provider: *const FFI_TableProvider, +} // The callback used to get array schema -unsafe extern "C" fn provider_schema( - provider: *mut FFI_TableProvider, - schema: *mut FFI_ArrowSchema, -) -> c_int { - ExportedTableProvider { provider }.schema(schema) +unsafe extern "C" fn provider_schema(provider: *const FFI_TableProvider) -> FFI_ArrowSchema { + println!("callback function"); + ConstExportedTableProvider { provider }.provider_schema() } unsafe extern "C" fn provider_scan( @@ -151,8 +159,12 @@ unsafe extern "C" fn provider_scan( let limit = limit.try_into().ok(); - let plan = - ExportedTableProvider { provider }.scan(&session, maybe_projections, filters_vec, limit); + let plan = ExportedTableProvider { provider }.provider_scan( + &session, + maybe_projections, + filters_vec, + limit, + ); match plan { Ok(mut plan) => { @@ -163,33 +175,33 @@ unsafe extern "C" fn provider_scan( } } -impl ExportedTableProvider { - fn get_private_data(&mut self) -> &mut ProviderPrivateData { - unsafe { &mut *((*self.provider).private_data as *mut ProviderPrivateData) } +impl ConstExportedTableProvider { + fn get_private_data(&self) -> &ProviderPrivateData { + unsafe { &*((*self.provider).private_data as *const ProviderPrivateData) } } - pub fn schema(&mut self, out: *mut FFI_ArrowSchema) -> i32 { + pub fn provider_schema(&self) -> FFI_ArrowSchema { + println!("Enter exported table provider"); let private_data = self.get_private_data(); let provider = &private_data.provider; - let schema = FFI_ArrowSchema::try_from(provider.schema().as_ref()); + println!("about to try from in provider.schema()"); + // This does silently fail because TableProvider does not return a result + // so we expect it to always pass. Maybe some logging should be added. + let mut schema = FFI_ArrowSchema::try_from(provider.schema().as_ref()) + .unwrap_or(FFI_ArrowSchema::empty()); - match schema { - Ok(schema) => { - unsafe { std::ptr::copy(addr_of!(schema), out, 1) }; - std::mem::forget(schema); - 0 - } - Err(ref err) => { - private_data.last_error = Some( - CString::new(err.to_string()).expect("Error string has a null byte in it."), - ); - get_error_code(err) - } - } + println!("Found the schema but can we return it?"); + schema } +} - pub fn scan( +impl ExportedTableProvider { + fn get_private_data(&mut self) -> &mut ProviderPrivateData { + unsafe { &mut *((*self.provider).private_data as *mut ProviderPrivateData) } + } + + pub fn provider_scan( &mut self, session: &SessionState, projections: Option<&Vec>, @@ -270,3 +282,65 @@ impl FFI_TableProvider { } } } + +#[async_trait] +impl TableProvider for FFI_TableProvider { + /// Returns the table provider as [`Any`](std::any::Any) so that it can be + /// downcast to a specific implementation. + fn as_any(&self) -> &dyn Any { + self + } + + /// Get a reference to the schema for this table + fn schema(&self) -> SchemaRef { + let schema = match self.schema { + Some(func) => { + println!("About to call the function to get the schema"); + unsafe { + let v = func(self); + println!("Got the mutalbe ffi_arrow_schmea?"); + // func(self).as_ref().and_then(|s| Schema::try_from(s).ok()) + Schema::try_from(&func(self)).ok() + } + } + None => None, + }; + Arc::new(schema.unwrap_or(Schema::empty())) + } + + /// Get the type of this table for metadata/catalog purposes. + fn table_type(&self) -> TableType { + TableType::Base + } + + /// Create an ExecutionPlan that will scan the table. + /// The table provider will be usually responsible of grouping + /// the source data into partitions that can be efficiently + /// parallelized or distributed. + async fn scan( + &self, + _ctx: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + // limit can be used to reduce the amount scanned + // from the datasource as a performance optimization. + // If set, it contains the amount of rows needed by the `LogicalPlan`, + // The datasource should return *at least* this number of rows if available. + _limit: Option, + ) -> Result> { + Err(datafusion::error::DataFusionError::NotImplemented( + "scan not implemented".to_string(), + )) + } + + /// Tests whether the table provider can make use of a filter expression + /// to optimise data retrieval. + fn supports_filters_pushdown( + &self, + filter: &[&Expr], + ) -> Result> { + Err(datafusion::error::DataFusionError::NotImplemented( + "support filter pushdown not implemented".to_string(), + )) + } +} From f8300585adc95cb9c568ff4e444c6aeafd0c8608 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Sep 2024 18:05:42 -0400 Subject: [PATCH 03/28] Expand file structure --- src/context.rs | 18 +---- src/ffi/execution_plan.rs | 21 ++++++ src/ffi/mod.rs | 20 ++++++ src/ffi/session_config.rs | 30 +++++++++ src/{ffi.rs => ffi/table_provider.rs} | 95 +++++---------------------- 5 files changed, 89 insertions(+), 95 deletions(-) create mode 100644 src/ffi/execution_plan.rs create mode 100644 src/ffi/mod.rs create mode 100644 src/ffi/session_config.rs rename src/{ffi.rs => ffi/table_provider.rs} (77%) diff --git a/src/context.rs b/src/context.rs index b4ba6b229..2671ff40f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -38,7 +38,7 @@ use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; use crate::expr::sort_expr::PySortExpr; use crate::expr::PyExpr; -use crate::ffi::FFI_TableProvider; +use crate::ffi::table_provider::FFI_TableProvider; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -579,26 +579,14 @@ impl PySessionContext { let capsule = capsule.downcast::()?; // validate_pycapsule(capsule, "arrow_array_stream")?; - let mut provider = unsafe { FFI_TableProvider::from_raw(capsule.pointer() as _) }; + let provider = unsafe { FFI_TableProvider::from_raw(capsule.pointer() as _) }; println!("Found provider version {}", provider.version); let schema = provider.schema(); println!("Got schema through TableProvider trait {}", schema); - // if let Some(s) = provider.schema { - // let mut schema = s(provider); - - // if ret_code == 0 { - // let schema = Schema::try_from(&schema) - // .map_err(|e| PyValueError::new_err(e.to_string()))?; - // println!("got schema {}", schema); - // } else { - // return Err(PyValueError::new_err(format!( - // "Cannot get schema from input stream. Error code: {ret_code:?}" - // ))); - // } - // } + let _ = self.ctx.register_table(name, Arc::new(provider))?; } Ok(()) } diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs new file mode 100644 index 000000000..777dc0888 --- /dev/null +++ b/src/ffi/execution_plan.rs @@ -0,0 +1,21 @@ +use std::{ + ffi::{c_void, CString}, + sync::Arc, +}; + +use datafusion::physical_plan::ExecutionPlan; + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_ExecutionPlan { + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_ExecutionPlan {} + +pub struct ExecutionPlanPrivateData { + pub plan: Arc, + pub last_error: Option, +} diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs new file mode 100644 index 000000000..57bfc97eb --- /dev/null +++ b/src/ffi/mod.rs @@ -0,0 +1,20 @@ +pub mod execution_plan; +pub mod session_config; +pub mod table_provider; + +#[repr(C)] +#[derive(Debug)] +#[allow(non_camel_case_types)] +pub enum FFI_Constraint { + /// Columns with the given indices form a composite primary key (they are + /// jointly unique and not nullable): + PrimaryKey(Vec), + /// Columns with the given indices form a composite unique key: + Unique(Vec), +} + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_Expr {} diff --git a/src/ffi/session_config.rs b/src/ffi/session_config.rs new file mode 100644 index 000000000..becd12645 --- /dev/null +++ b/src/ffi/session_config.rs @@ -0,0 +1,30 @@ +use std::ffi::{c_void, CString}; + +use datafusion::prelude::SessionConfig; + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_SessionConfig { + pub version: i64, + + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_SessionConfig {} + +pub struct SessionConfigPrivateData { + pub config: SessionConfig, + pub last_error: Option, +} + +struct ExportedSessionConfig { + session: *mut FFI_SessionConfig, +} + +impl ExportedSessionConfig { + fn get_private_data(&mut self) -> &mut SessionConfigPrivateData { + unsafe { &mut *((*self.session).private_data as *mut SessionConfigPrivateData) } + } +} diff --git a/src/ffi.rs b/src/ffi/table_provider.rs similarity index 77% rename from src/ffi.rs rename to src/ffi/table_provider.rs index d7d6d06ef..593660983 100644 --- a/src/ffi.rs +++ b/src/ffi/table_provider.rs @@ -1,7 +1,6 @@ use std::{ any::Any, ffi::{c_char, c_int, c_void, CStr, CString}, - ptr::{addr_of, addr_of_mut}, sync::Arc, }; @@ -14,73 +13,20 @@ use async_trait::async_trait; use datafusion::{ catalog::{Session, TableProvider}, common::DFSchema, + datasource::TableType, + error::DataFusionError, execution::{context::SessionState, session_state::SessionStateBuilder}, + logical_expr::TableProviderFilterPushDown, physical_plan::ExecutionPlan, - prelude::{Expr, SessionConfig}, -}; -use datafusion::{ - common::Result, datasource::TableType, logical_expr::TableProviderFilterPushDown, + prelude::Expr, }; use tokio::runtime::Runtime; -#[repr(C)] -#[derive(Debug)] -#[allow(non_camel_case_types)] -pub enum FFI_Constraint { - /// Columns with the given indices form a composite primary key (they are - /// jointly unique and not nullable): - PrimaryKey(Vec), - /// Columns with the given indices form a composite unique key: - Unique(Vec), -} - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_ExecutionPlan { - pub private_data: *mut c_void, -} - -unsafe impl Send for FFI_ExecutionPlan {} - -struct ExecutionPlanPrivateData { - plan: Arc, - last_error: Option, -} - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_SessionConfig { - pub version: i64, - - pub private_data: *mut c_void, -} - -unsafe impl Send for FFI_SessionConfig {} - -struct SessionConfigPrivateData { - config: SessionConfig, - last_error: Option, -} - -struct ExportedSessionConfig { - session: *mut FFI_SessionConfig, -} - -impl ExportedSessionConfig { - fn get_private_data(&mut self) -> &mut SessionConfigPrivateData { - unsafe { &mut *((*self.session).private_data as *mut SessionConfigPrivateData) } - } -} - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_Expr {} +use super::{ + execution_plan::{ExecutionPlanPrivateData, FFI_ExecutionPlan}, + session_config::{FFI_SessionConfig, SessionConfigPrivateData}, +}; +use datafusion::error::Result; #[repr(C)] #[derive(Debug)] @@ -121,7 +67,6 @@ struct ConstExportedTableProvider { // The callback used to get array schema unsafe extern "C" fn provider_schema(provider: *const FFI_TableProvider) -> FFI_ArrowSchema { - println!("callback function"); ConstExportedTableProvider { provider }.provider_schema() } @@ -181,18 +126,12 @@ impl ConstExportedTableProvider { } pub fn provider_schema(&self) -> FFI_ArrowSchema { - println!("Enter exported table provider"); let private_data = self.get_private_data(); let provider = &private_data.provider; - println!("about to try from in provider.schema()"); // This does silently fail because TableProvider does not return a result // so we expect it to always pass. Maybe some logging should be added. - let mut schema = FFI_ArrowSchema::try_from(provider.schema().as_ref()) - .unwrap_or(FFI_ArrowSchema::empty()); - - println!("Found the schema but can we return it?"); - schema + FFI_ArrowSchema::try_from(provider.schema().as_ref()).unwrap_or(FFI_ArrowSchema::empty()) } } @@ -294,15 +233,7 @@ impl TableProvider for FFI_TableProvider { /// Get a reference to the schema for this table fn schema(&self) -> SchemaRef { let schema = match self.schema { - Some(func) => { - println!("About to call the function to get the schema"); - unsafe { - let v = func(self); - println!("Got the mutalbe ffi_arrow_schmea?"); - // func(self).as_ref().and_then(|s| Schema::try_from(s).ok()) - Schema::try_from(&func(self)).ok() - } - } + Some(func) => unsafe { Schema::try_from(&func(self)).ok() }, None => None, }; Arc::new(schema.unwrap_or(Schema::empty())) @@ -328,6 +259,10 @@ impl TableProvider for FFI_TableProvider { // The datasource should return *at least* this number of rows if available. _limit: Option, ) -> Result> { + let scan_fn = self.scan.ok_or(DataFusionError::NotImplemented( + "Scan not defined on FFI_TableProvider".to_string(), + ))?; + Err(datafusion::error::DataFusionError::NotImplemented( "scan not implemented".to_string(), )) From eec590fe49c310528f5a31b7031079968c54ba0e Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 9 Sep 2024 19:55:41 -0400 Subject: [PATCH 04/28] WIP on execution plan --- src/ffi/execution_plan.rs | 88 ++++++++++++++++++++++++++++++++++++++- src/ffi/session_config.rs | 20 ++++++++- src/ffi/table_provider.rs | 71 ++++++++++++++++++------------- 3 files changed, 147 insertions(+), 32 deletions(-) diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 777dc0888..a39790329 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -3,19 +3,105 @@ use std::{ sync::Arc, }; -use datafusion::physical_plan::ExecutionPlan; +use datafusion::{physical_expr::{EquivalenceProperties, LexOrdering}, physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties}}; #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] #[allow(non_camel_case_types)] pub struct FFI_ExecutionPlan { + pub properties: Option FFI_ArrowSchema>, + pub private_data: *mut c_void, } unsafe impl Send for FFI_ExecutionPlan {} +unsafe impl Sync for FFI_ExecutionPlan {} pub struct ExecutionPlanPrivateData { pub plan: Arc, pub last_error: Option, } + +struct ExportedExecutionPlan(*const FFI_ExecutionPlan); + +impl FFI_ExecutionPlan { + + pub fn empty() -> Self { + Self { + private_data: std::ptr::null_mut(), + } + } +} + +impl FFI_ExecutionPlan { + pub fn new(plan: Arc) -> Self { + let private_data = Box::new(ExecutionPlanPrivateData { + plan, + last_error: None, + }); + + Self { + private_data: Box::into_raw(private_data) as *mut c_void + } + } +} + +impl ExecutionPlan for FFI_ExecutionPlan { + fn name(&self) -> &str { + todo!() + } + + fn as_any(&self) -> &dyn std::any::Any { + todo!() + } + + fn properties(&self) -> &datafusion::physical_plan::PlanProperties { + self.properties + } + + fn children(&self) -> Vec<&Arc> { + todo!() + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> datafusion::error::Result> { + todo!() + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> datafusion::error::Result { + todo!() + } +} + +impl DisplayAs for FFI_ExecutionPlan { + fn fmt_as(&self, t: datafusion::physical_plan::DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { + todo!() + } +} + + + + + + +#[repr(C)] +#[derive(Debug)] +#[allow(missing_docs)] +#[allow(non_camel_case_types)] +pub struct FFI_PlanProperties { + /// See [ExecutionPlanProperties::equivalence_properties] + pub eq_properties: EquivalenceProperties, + /// See [ExecutionPlanProperties::output_partitioning] + pub partitioning: Partitioning, + /// See [ExecutionPlanProperties::execution_mode] + pub execution_mode: ExecutionMode, + /// See [ExecutionPlanProperties::output_ordering] + output_ordering: Option, +} \ No newline at end of file diff --git a/src/ffi/session_config.rs b/src/ffi/session_config.rs index becd12645..ab81e0bc9 100644 --- a/src/ffi/session_config.rs +++ b/src/ffi/session_config.rs @@ -1,6 +1,6 @@ -use std::ffi::{c_void, CString}; +use std::{ffi::{c_void, CString}, sync::Arc}; -use datafusion::prelude::SessionConfig; +use datafusion::{catalog::Session, prelude::SessionConfig}; #[repr(C)] #[derive(Debug)] @@ -28,3 +28,19 @@ impl ExportedSessionConfig { unsafe { &mut *((*self.session).private_data as *mut SessionConfigPrivateData) } } } + +impl FFI_SessionConfig { + /// Creates a new [`FFI_TableProvider`]. + pub fn new(session: &dyn Session) -> Self { + let config = session.config().clone(); + let private_data = Box::new(SessionConfigPrivateData { + config, + last_error: None, + }); + + Self { + version: 2, + private_data: Box::into_raw(private_data) as *mut c_void, + } + } +} \ No newline at end of file diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index 593660983..90e5f1084 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -37,12 +37,12 @@ pub struct FFI_TableProvider { pub schema: Option FFI_ArrowSchema>, pub scan: Option< unsafe extern "C" fn( - provider: *mut FFI_TableProvider, - session_config: *mut FFI_SessionConfig, + provider: *const FFI_TableProvider, + session_config: *const FFI_SessionConfig, n_projections: c_int, - projections: *mut c_int, + projections: *const c_int, n_filters: c_int, - filters: *mut *const c_char, + filters: *const *const c_char, limit: c_int, out: *mut FFI_ExecutionPlan, ) -> c_int, @@ -58,25 +58,20 @@ struct ProviderPrivateData { last_error: Option, } -struct ExportedTableProvider { - provider: *mut FFI_TableProvider, -} -struct ConstExportedTableProvider { - provider: *const FFI_TableProvider, -} +struct ExportedTableProvider(*const FFI_TableProvider); // The callback used to get array schema unsafe extern "C" fn provider_schema(provider: *const FFI_TableProvider) -> FFI_ArrowSchema { - ConstExportedTableProvider { provider }.provider_schema() + ExportedTableProvider(provider).provider_schema() } unsafe extern "C" fn provider_scan( - provider: *mut FFI_TableProvider, - session_config: *mut FFI_SessionConfig, + provider: *const FFI_TableProvider, + session_config: *const FFI_SessionConfig, n_projections: c_int, - projections: *mut c_int, + projections: *const c_int, n_filters: c_int, - filters: *mut *const c_char, + filters: *const *const c_char, limit: c_int, mut out: *mut FFI_ExecutionPlan, ) -> c_int { @@ -104,7 +99,7 @@ unsafe extern "C" fn provider_scan( let limit = limit.try_into().ok(); - let plan = ExportedTableProvider { provider }.provider_scan( + let plan = ExportedTableProvider(provider).provider_scan( &session, maybe_projections, filters_vec, @@ -120,9 +115,9 @@ unsafe extern "C" fn provider_scan( } } -impl ConstExportedTableProvider { +impl ExportedTableProvider { fn get_private_data(&self) -> &ProviderPrivateData { - unsafe { &*((*self.provider).private_data as *const ProviderPrivateData) } + unsafe { &*((*self.0).private_data as *const ProviderPrivateData) } } pub fn provider_schema(&self) -> FFI_ArrowSchema { @@ -133,12 +128,6 @@ impl ConstExportedTableProvider { // so we expect it to always pass. Maybe some logging should be added. FFI_ArrowSchema::try_from(provider.schema().as_ref()).unwrap_or(FFI_ArrowSchema::empty()) } -} - -impl ExportedTableProvider { - fn get_private_data(&mut self) -> &mut ProviderPrivateData { - unsafe { &mut *((*self.provider).private_data as *mut ProviderPrivateData) } - } pub fn provider_scan( &mut self, @@ -250,22 +239,46 @@ impl TableProvider for FFI_TableProvider { /// parallelized or distributed. async fn scan( &self, - _ctx: &dyn Session, + session: &dyn Session, projection: Option<&Vec>, filters: &[Expr], // limit can be used to reduce the amount scanned // from the datasource as a performance optimization. // If set, it contains the amount of rows needed by the `LogicalPlan`, // The datasource should return *at least* this number of rows if available. - _limit: Option, + limit: Option, ) -> Result> { let scan_fn = self.scan.ok_or(DataFusionError::NotImplemented( "Scan not defined on FFI_TableProvider".to_string(), ))?; - Err(datafusion::error::DataFusionError::NotImplemented( - "scan not implemented".to_string(), - )) + let session_config = FFI_SessionConfig::new(session); + + let n_projections = projection.map(|p| p.len()).unwrap_or(0) as c_int; + let projections: Vec = projection.map(|p| p.iter().map(|v| *v as c_int).collect()).unwrap_or_default(); + let projections_ptr = projections.as_ptr(); + + let n_filters = filters.len() as c_int; + let filters: Vec = filters.iter().filter_map(|f| CString::new(f.to_string()).ok()).collect(); + let filters_ptr: Vec<*const i8> = filters.iter() + .map(|s| s.as_ptr()) + .collect(); + + let limit = match limit { + Some(l) => l as c_int, + None => -1, + }; + + let mut out = FFI_ExecutionPlan::empty(); + + let err_code = unsafe { + scan_fn(self, &session_config, n_projections, projections_ptr, n_filters, filters_ptr.as_ptr(), limit, &mut out) + }; + + match err_code { + 0 => Ok(Arc::new(out)), + _ => Err(datafusion::error::DataFusionError::Internal("Unable to perform scan via FFI".to_string())) + } } /// Tests whether the table provider can make use of a filter expression From f7e7acab2812b2334dc9772f03162125b39defdc Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 5 Oct 2024 10:28:01 -0400 Subject: [PATCH 05/28] Working through execution plan FFI --- src/context.rs | 4 +- src/ffi/execution_plan.rs | 154 ++++++++++++++++++++++++++++++++------ src/ffi/session_config.rs | 7 +- src/ffi/table_provider.rs | 49 ++++++++---- 4 files changed, 173 insertions(+), 41 deletions(-) diff --git a/src/context.rs b/src/context.rs index 2671ff40f..2e9ac95c0 100644 --- a/src/context.rs +++ b/src/context.rs @@ -59,7 +59,9 @@ use datafusion::datasource::listing::{ }; use datafusion::datasource::TableProvider; use datafusion::datasource::{provider, MemTable}; -use datafusion::execution::context::{DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext}; +use datafusion::execution::context::{ + DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext, +}; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; use datafusion::execution::options::ReadOptions; diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index a39790329..0603b64bf 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -1,36 +1,84 @@ use std::{ ffi::{c_void, CString}, + ptr::null, sync::Arc, }; -use datafusion::{physical_expr::{EquivalenceProperties, LexOrdering}, physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties}}; +use datafusion::error::Result; +use datafusion::{ + error::DataFusionError, + parquet::file::properties, + physical_expr::{EquivalenceProperties, LexOrdering}, + physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties}, +}; #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] #[allow(non_camel_case_types)] pub struct FFI_ExecutionPlan { - pub properties: Option FFI_ArrowSchema>, + pub properties: + Option FFI_PlanProperties>, + pub children: Option< + unsafe extern "C" fn( + plan: *const FFI_ExecutionPlan, + num_children: &mut usize, + out: &mut *const FFI_ExecutionPlan, + ) -> i32, + >, pub private_data: *mut c_void, } -unsafe impl Send for FFI_ExecutionPlan {} -unsafe impl Sync for FFI_ExecutionPlan {} - pub struct ExecutionPlanPrivateData { pub plan: Arc, pub last_error: Option, } -struct ExportedExecutionPlan(*const FFI_ExecutionPlan); +unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { + let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + let properties = (*private_data).plan.properties(); + properties.into() +} -impl FFI_ExecutionPlan { +unsafe extern "C" fn children_fn_wrapper( + plan: *const FFI_ExecutionPlan, + num_children: &mut usize, + out: &mut *const FFI_ExecutionPlan, +) -> i32 { + let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + + let children = (*private_data).plan.children(); + *num_children = children.len(); + let children: Vec = children + .into_iter() + .map(|child| FFI_ExecutionPlan::new(child.clone())) + .collect(); + *out = children.as_ptr(); + + 0 +} - pub fn empty() -> Self { - Self { - private_data: std::ptr::null_mut(), - } +// Since the trait ExecutionPlan requires borrowed values, we wrap our FFI. +// This struct exists on the consumer side (datafusion-python, for example) and not +// in the provider's side. +#[derive(Debug)] +pub struct ExportedExecutionPlan { + plan: *const FFI_ExecutionPlan, + properties: PlanProperties, + children: Vec>, +} + +unsafe impl Send for ExportedExecutionPlan {} +unsafe impl Sync for ExportedExecutionPlan {} + +impl DisplayAs for ExportedExecutionPlan { + fn fmt_as( + &self, + t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + todo!() } } @@ -42,12 +90,63 @@ impl FFI_ExecutionPlan { }); Self { - private_data: Box::into_raw(private_data) as *mut c_void + properties: Some(properties_fn_wrapper), + children: Some(children_fn_wrapper), + private_data: Box::into_raw(private_data) as *mut c_void, + } + } + + pub fn empty() -> Self { + Self { + properties: None, + children: None, + private_data: std::ptr::null_mut(), } } } -impl ExecutionPlan for FFI_ExecutionPlan { +impl ExportedExecutionPlan { + pub fn new(plan: *const FFI_ExecutionPlan) -> Result { + let properties = unsafe { + let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( + "properties not implemented on FFI_ExecutionPlan".to_string(), + ))?; + properties_fn(plan).into() + }; + + let children = unsafe { + let children_fn = (*plan).children.ok_or(DataFusionError::NotImplemented( + "children not implemented on FFI_ExecutionPlan".to_string(), + ))?; + let mut num_children = 0; + let mut children_ptr: *const FFI_ExecutionPlan = null(); + + if children_fn(plan, &mut num_children, &mut children_ptr) != 0 { + return Err(DataFusionError::Plan( + "Error getting children for FFI_ExecutionPlan".to_string(), + )); + } + + let ffi_vec = Vec::from_raw_parts(&mut children_ptr, num_children, num_children); + let maybe_children: Result> = ffi_vec + .into_iter() + .map(|child| { + ExportedExecutionPlan::new(child).map(|c| Arc::new(c) as Arc) + }) + .collect(); + + maybe_children? + }; + + Ok(Self { + plan, + properties, + children, + }) + } +} + +impl ExecutionPlan for ExportedExecutionPlan { fn name(&self) -> &str { todo!() } @@ -57,11 +156,11 @@ impl ExecutionPlan for FFI_ExecutionPlan { } fn properties(&self) -> &datafusion::physical_plan::PlanProperties { - self.properties + &self.properties } fn children(&self) -> Vec<&Arc> { - todo!() + self.children.iter().collect() } fn with_new_children( @@ -81,16 +180,15 @@ impl ExecutionPlan for FFI_ExecutionPlan { } impl DisplayAs for FFI_ExecutionPlan { - fn fmt_as(&self, t: datafusion::physical_plan::DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt_as( + &self, + t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { todo!() } } - - - - - #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] @@ -104,4 +202,16 @@ pub struct FFI_PlanProperties { pub execution_mode: ExecutionMode, /// See [ExecutionPlanProperties::output_ordering] output_ordering: Option, -} \ No newline at end of file +} + +impl From<&PlanProperties> for FFI_PlanProperties { + fn from(value: &PlanProperties) -> Self { + todo!() + } +} + +impl From for PlanProperties { + fn from(value: FFI_PlanProperties) -> Self { + todo!() + } +} diff --git a/src/ffi/session_config.rs b/src/ffi/session_config.rs index ab81e0bc9..727bf4a77 100644 --- a/src/ffi/session_config.rs +++ b/src/ffi/session_config.rs @@ -1,4 +1,7 @@ -use std::{ffi::{c_void, CString}, sync::Arc}; +use std::{ + ffi::{c_void, CString}, + sync::Arc, +}; use datafusion::{catalog::Session, prelude::SessionConfig}; @@ -43,4 +46,4 @@ impl FFI_SessionConfig { private_data: Box::into_raw(private_data) as *mut c_void, } } -} \ No newline at end of file +} diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index 90e5f1084..fef4b9091 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -23,7 +23,7 @@ use datafusion::{ use tokio::runtime::Runtime; use super::{ - execution_plan::{ExecutionPlanPrivateData, FFI_ExecutionPlan}, + execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFI_ExecutionPlan}, session_config::{FFI_SessionConfig, SessionConfigPrivateData}, }; use datafusion::error::Result; @@ -150,14 +150,15 @@ impl ExportedTableProvider { let runtime = Runtime::new().unwrap(); let plan = runtime.block_on(provider.scan(session, projections, &filter_exprs, limit))?; - let plan_ptr = Box::new(ExecutionPlanPrivateData { - plan, - last_error: None, - }); + // let plan_ptr = Box::new(ExecutionPlanPrivateData { + // plan, + // last_error: None, + // }); - Ok(FFI_ExecutionPlan { - private_data: Box::into_raw(plan_ptr) as *mut c_void, - }) + // Ok(FFI_ExecutionPlan { + // private_data: Box::into_raw(plan_ptr) as *mut c_void, + // }) + Ok(FFI_ExecutionPlan::new(plan)) } } @@ -255,14 +256,17 @@ impl TableProvider for FFI_TableProvider { let session_config = FFI_SessionConfig::new(session); let n_projections = projection.map(|p| p.len()).unwrap_or(0) as c_int; - let projections: Vec = projection.map(|p| p.iter().map(|v| *v as c_int).collect()).unwrap_or_default(); + let projections: Vec = projection + .map(|p| p.iter().map(|v| *v as c_int).collect()) + .unwrap_or_default(); let projections_ptr = projections.as_ptr(); let n_filters = filters.len() as c_int; - let filters: Vec = filters.iter().filter_map(|f| CString::new(f.to_string()).ok()).collect(); - let filters_ptr: Vec<*const i8> = filters.iter() - .map(|s| s.as_ptr()) + let filters: Vec = filters + .iter() + .filter_map(|f| CString::new(f.to_string()).ok()) .collect(); + let filters_ptr: Vec<*const i8> = filters.iter().map(|s| s.as_ptr()).collect(); let limit = match limit { Some(l) => l as c_int, @@ -272,13 +276,26 @@ impl TableProvider for FFI_TableProvider { let mut out = FFI_ExecutionPlan::empty(); let err_code = unsafe { - scan_fn(self, &session_config, n_projections, projections_ptr, n_filters, filters_ptr.as_ptr(), limit, &mut out) + scan_fn( + self, + &session_config, + n_projections, + projections_ptr, + n_filters, + filters_ptr.as_ptr(), + limit, + &mut out, + ) }; - match err_code { - 0 => Ok(Arc::new(out)), - _ => Err(datafusion::error::DataFusionError::Internal("Unable to perform scan via FFI".to_string())) + if 0 != err_code { + return Err(datafusion::error::DataFusionError::Internal( + "Unable to perform scan via FFI".to_string(), + )); } + + let plan = ExportedExecutionPlan::new(&out)?; + Ok(Arc::new(plan)) } /// Tests whether the table provider can make use of a filter expression From 803e6ff505872908e600b5c240336228c9a19b1b Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 6 Oct 2024 08:35:02 -0400 Subject: [PATCH 06/28] Using datafusion-proto for execution plan properties --- src/ffi/execution_plan.rs | 125 +++++++++++++++++++++++++++----------- src/ffi/table_provider.rs | 41 ++++++------- 2 files changed, 108 insertions(+), 58 deletions(-) diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 0603b64bf..4d8b37eea 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -7,23 +7,21 @@ use std::{ use datafusion::error::Result; use datafusion::{ error::DataFusionError, - parquet::file::properties, - physical_expr::{EquivalenceProperties, LexOrdering}, - physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties}, + physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}, }; #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] #[allow(non_camel_case_types)] -pub struct FFI_ExecutionPlan { +pub struct FFIExecutionPlan { pub properties: - Option FFI_PlanProperties>, + Option FFIPlanProperties>, pub children: Option< unsafe extern "C" fn( - plan: *const FFI_ExecutionPlan, + plan: *const FFIExecutionPlan, num_children: &mut usize, - out: &mut *const FFI_ExecutionPlan, + out: &mut *const FFIExecutionPlan, ) -> i32, >, @@ -35,24 +33,24 @@ pub struct ExecutionPlanPrivateData { pub last_error: Option, } -unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { +unsafe extern "C" fn properties_fn_wrapper(plan: *const FFIExecutionPlan) -> FFIPlanProperties { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let properties = (*private_data).plan.properties(); properties.into() } unsafe extern "C" fn children_fn_wrapper( - plan: *const FFI_ExecutionPlan, + plan: *const FFIExecutionPlan, num_children: &mut usize, - out: &mut *const FFI_ExecutionPlan, + out: &mut *const FFIExecutionPlan, ) -> i32 { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let children = (*private_data).plan.children(); *num_children = children.len(); - let children: Vec = children + let children: Vec = children .into_iter() - .map(|child| FFI_ExecutionPlan::new(child.clone())) + .map(|child| FFIExecutionPlan::new(child.clone())) .collect(); *out = children.as_ptr(); @@ -64,7 +62,7 @@ unsafe extern "C" fn children_fn_wrapper( // in the provider's side. #[derive(Debug)] pub struct ExportedExecutionPlan { - plan: *const FFI_ExecutionPlan, + plan: *const FFIExecutionPlan, properties: PlanProperties, children: Vec>, } @@ -75,14 +73,18 @@ unsafe impl Sync for ExportedExecutionPlan {} impl DisplayAs for ExportedExecutionPlan { fn fmt_as( &self, - t: datafusion::physical_plan::DisplayFormatType, + _t: datafusion::physical_plan::DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - todo!() + write!( + f, + "FFIExecutionPlan(number_of_children={})", + self.children.len(), + ) } } -impl FFI_ExecutionPlan { +impl FFIExecutionPlan { pub fn new(plan: Arc) -> Self { let private_data = Box::new(ExecutionPlanPrivateData { plan, @@ -106,24 +108,30 @@ impl FFI_ExecutionPlan { } impl ExportedExecutionPlan { - pub fn new(plan: *const FFI_ExecutionPlan) -> Result { + /// Wrap a FFI Execution Plan + /// + /// # Safety + /// + /// The caller must ensure the pointer provided points to a valid implementation + /// of FFIExecutionPlan + pub unsafe fn new(plan: *const FFIExecutionPlan) -> Result { let properties = unsafe { let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( - "properties not implemented on FFI_ExecutionPlan".to_string(), + "properties not implemented on FFIExecutionPlan".to_string(), ))?; properties_fn(plan).into() }; let children = unsafe { let children_fn = (*plan).children.ok_or(DataFusionError::NotImplemented( - "children not implemented on FFI_ExecutionPlan".to_string(), + "children not implemented on FFIExecutionPlan".to_string(), ))?; let mut num_children = 0; - let mut children_ptr: *const FFI_ExecutionPlan = null(); + let mut children_ptr: *const FFIExecutionPlan = null(); if children_fn(plan, &mut num_children, &mut children_ptr) != 0 { return Err(DataFusionError::Plan( - "Error getting children for FFI_ExecutionPlan".to_string(), + "Error getting children for FFIExecutionPlan".to_string(), )); } @@ -179,7 +187,7 @@ impl ExecutionPlan for ExportedExecutionPlan { } } -impl DisplayAs for FFI_ExecutionPlan { +impl DisplayAs for FFIExecutionPlan { fn fmt_as( &self, t: datafusion::physical_plan::DisplayFormatType, @@ -192,26 +200,69 @@ impl DisplayAs for FFI_ExecutionPlan { #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_PlanProperties { - /// See [ExecutionPlanProperties::equivalence_properties] - pub eq_properties: EquivalenceProperties, - /// See [ExecutionPlanProperties::output_partitioning] - pub partitioning: Partitioning, - /// See [ExecutionPlanProperties::execution_mode] - pub execution_mode: ExecutionMode, - /// See [ExecutionPlanProperties::output_ordering] - output_ordering: Option, -} - -impl From<&PlanProperties> for FFI_PlanProperties { +pub struct FFIPlanProperties { + // We will build equivalence properties from teh schema and ordersing (new_with_orderings). This is how we do ti in dataset_exec + // pub eq_properties: Option EquivalenceProperties>, + + // Returns protobuf serialized bytes of the partitioning + pub output_partitioning: Option< + unsafe extern "C" fn( + plan: *const FFIPlanProperties, + buffer_size: &mut usize, + buffer_bytes: &mut *mut u8, + ) -> i32, + >, + + pub execution_mode: + Option FFIExecutionMode>, + + // PhysicalSortExprNodeCollection proto + pub output_ordering: Option< + unsafe extern "C" fn( + plan: *const FFIPlanProperties, + buffer_size: &mut usize, + buffer_bytes: &mut *mut u8, + ) -> i32, + >, +} + +impl From<&PlanProperties> for FFIPlanProperties { fn from(value: &PlanProperties) -> Self { todo!() } } -impl From for PlanProperties { - fn from(value: FFI_PlanProperties) -> Self { +impl From for PlanProperties { + fn from(value: FFIPlanProperties) -> Self { todo!() } } + +#[repr(C)] +pub enum FFIExecutionMode { + Bounded, + + Unbounded, + + PipelineBreaking, +} + +impl From for FFIExecutionMode { + fn from(value: ExecutionMode) -> Self { + match value { + ExecutionMode::Bounded => FFIExecutionMode::Bounded, + ExecutionMode::Unbounded => FFIExecutionMode::Unbounded, + ExecutionMode::PipelineBreaking => FFIExecutionMode::PipelineBreaking, + } + } +} + +impl From for ExecutionMode { + fn from(value: FFIExecutionMode) -> Self { + match value { + FFIExecutionMode::Bounded => ExecutionMode::Bounded, + FFIExecutionMode::Unbounded => ExecutionMode::Unbounded, + FFIExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, + } + } +} diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index fef4b9091..492506afa 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -23,7 +23,7 @@ use datafusion::{ use tokio::runtime::Runtime; use super::{ - execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFI_ExecutionPlan}, + execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFIExecutionPlan}, session_config::{FFI_SessionConfig, SessionConfigPrivateData}, }; use datafusion::error::Result; @@ -44,7 +44,7 @@ pub struct FFI_TableProvider { n_filters: c_int, filters: *const *const c_char, limit: c_int, - out: *mut FFI_ExecutionPlan, + out: *mut FFIExecutionPlan, ) -> c_int, >, pub private_data: *mut c_void, @@ -73,7 +73,7 @@ unsafe extern "C" fn provider_scan( n_filters: c_int, filters: *const *const c_char, limit: c_int, - mut out: *mut FFI_ExecutionPlan, + mut out: *mut FFIExecutionPlan, ) -> c_int { let config = unsafe { (*session_config).private_data as *const SessionConfigPrivateData }; let session = SessionStateBuilder::new() @@ -135,7 +135,7 @@ impl ExportedTableProvider { projections: Option<&Vec>, filters: Vec, limit: Option, - ) -> Result { + ) -> Result { let private_data = self.get_private_data(); let provider = &private_data.provider; @@ -155,10 +155,10 @@ impl ExportedTableProvider { // last_error: None, // }); - // Ok(FFI_ExecutionPlan { + // Ok(FFIExecutionPlan { // private_data: Box::into_raw(plan_ptr) as *mut c_void, // }) - Ok(FFI_ExecutionPlan::new(plan)) + Ok(FFIExecutionPlan::new(plan)) } } @@ -231,7 +231,7 @@ impl TableProvider for FFI_TableProvider { /// Get the type of this table for metadata/catalog purposes. fn table_type(&self) -> TableType { - TableType::Base + todo!() } /// Create an ExecutionPlan that will scan the table. @@ -273,10 +273,10 @@ impl TableProvider for FFI_TableProvider { None => -1, }; - let mut out = FFI_ExecutionPlan::empty(); + let mut out = FFIExecutionPlan::empty(); - let err_code = unsafe { - scan_fn( + let plan = unsafe { + let err_code = scan_fn( self, &session_config, n_projections, @@ -285,16 +285,17 @@ impl TableProvider for FFI_TableProvider { filters_ptr.as_ptr(), limit, &mut out, - ) - }; + ); - if 0 != err_code { - return Err(datafusion::error::DataFusionError::Internal( - "Unable to perform scan via FFI".to_string(), - )); - } + if 0 != err_code { + return Err(datafusion::error::DataFusionError::Internal( + "Unable to perform scan via FFI".to_string(), + )); + } + + ExportedExecutionPlan::new(&out)? + }; - let plan = ExportedExecutionPlan::new(&out)?; Ok(Arc::new(plan)) } @@ -304,8 +305,6 @@ impl TableProvider for FFI_TableProvider { &self, filter: &[&Expr], ) -> Result> { - Err(datafusion::error::DataFusionError::NotImplemented( - "support filter pushdown not implemented".to_string(), - )) + todo!() } } From 9df2f866fc934bc5d6959604ff9210c9ae16c188 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 6 Oct 2024 09:26:46 -0400 Subject: [PATCH 07/28] Adding plan properties parsing from ffi --- src/ffi/execution_plan.rs | 100 +++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 6 deletions(-) diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 4d8b37eea..a5c411475 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -1,14 +1,24 @@ use std::{ ffi::{c_void, CString}, - ptr::null, + ptr::{null, null_mut}, + slice, sync::Arc, }; -use datafusion::error::Result; +use arrow::{datatypes::Schema, ffi::FFI_ArrowSchema}; use datafusion::{ error::DataFusionError, physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}, }; +use datafusion::{error::Result, physical_expr::EquivalenceProperties, prelude::SessionContext}; +use datafusion_proto::{ + physical_plan::{ + from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning}, + DefaultPhysicalExtensionCodec, + }, + protobuf::{partitioning, Partitioning, PhysicalSortExprNodeCollection}, +}; +use prost::{DecodeError, Message}; #[repr(C)] #[derive(Debug)] @@ -119,7 +129,7 @@ impl ExportedExecutionPlan { let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( "properties not implemented on FFIExecutionPlan".to_string(), ))?; - properties_fn(plan).into() + properties_fn(plan).try_into()? }; let children = unsafe { @@ -224,6 +234,8 @@ pub struct FFIPlanProperties { buffer_bytes: &mut *mut u8, ) -> i32, >, + + pub schema: Option FFI_ArrowSchema>, } impl From<&PlanProperties> for FFIPlanProperties { @@ -232,10 +244,86 @@ impl From<&PlanProperties> for FFIPlanProperties { } } -impl From for PlanProperties { - fn from(value: FFIPlanProperties) -> Self { - todo!() +impl TryFrom for PlanProperties { + type Error = DataFusionError; + + fn try_from(value: FFIPlanProperties) -> std::result::Result { + unsafe { + let schema_fn = value.schema.ok_or(DataFusionError::NotImplemented( + "schema() not implemented on FFIPlanProperties".to_string(), + ))?; + let ffi_schema = schema_fn(&value); + let schema: Schema = (&ffi_schema).try_into()?; + + let ordering_fn = value + .output_ordering + .ok_or(DataFusionError::NotImplemented( + "output_ordering() not implemented on FFIPlanProperties".to_string(), + ))?; + let mut buff_size = 0; + let mut buff = null_mut(); + if ordering_fn(&value, &mut buff_size, &mut buff) != 0 { + return Err(DataFusionError::Plan( + "Error occurred during FFI call to output_ordering in FFIPlanProperties" + .to_string(), + )); + } + let data = slice::from_raw_parts(buff, buff_size); + + let proto_output_ordering = PhysicalSortExprNodeCollection::decode(data) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + // TODO we will need to get these, but unsure if it happesn on the provider or consumer right now. + let default_ctx = SessionContext::new(); + let codex = DefaultPhysicalExtensionCodec {}; + let orderings = parse_physical_sort_exprs( + &proto_output_ordering.physical_sort_expr_nodes, + &default_ctx, + &schema, + &codex, + )?; + + let partitioning_fn = + value + .output_partitioning + .ok_or(DataFusionError::NotImplemented( + "output_partitioning() not implemented on FFIPlanProperties".to_string(), + ))?; + if partitioning_fn(&value, &mut buff_size, &mut buff) != 0 { + return Err(DataFusionError::Plan( + "Error occurred during FFI call to output_partitioning in FFIPlanProperties" + .to_string(), + )); + } + let data = slice::from_raw_parts(buff, buff_size); + + let proto_partitioning = + Partitioning::decode(data).map_err(|e| DataFusionError::External(Box::new(e)))?; + // TODO: Validate this unwrap is safe. + let partitioning = parse_protobuf_partitioning( + Some(&proto_partitioning), + &default_ctx, + &schema, + &codex, + )? + .unwrap(); + + let execution_mode_fn = value.execution_mode.ok_or(DataFusionError::NotImplemented( + "execution_mode() not implemented on FFIPlanProperties".to_string(), + ))?; + let execution_mode = execution_mode_fn(&value).into(); + + let eq_properties = + EquivalenceProperties::new_with_orderings(Arc::new(schema), &[orderings]); + + Ok(Self::new(eq_properties, partitioning, execution_mode)) + } } + // fn from(value: FFIPlanProperties) -> Self { + // let schema = self.schema() + + // let equiv_prop = EquivalenceProperties::new_with_orderings(schema, orderings); + // } } #[repr(C)] From 73a1899d07c1faa97139ed58ee167ebbdf44a113 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 6 Oct 2024 10:14:45 -0400 Subject: [PATCH 08/28] Standardize naming for FFI structs --- src/ffi/execution_plan.rs | 90 +++++++++++++++++++-------------------- src/ffi/table_provider.rs | 14 +++--- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index a5c411475..6428ab0db 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -24,14 +24,14 @@ use prost::{DecodeError, Message}; #[derive(Debug)] #[allow(missing_docs)] #[allow(non_camel_case_types)] -pub struct FFIExecutionPlan { +pub struct FFI_ExecutionPlan { pub properties: - Option FFIPlanProperties>, + Option FFI_PlanProperties>, pub children: Option< unsafe extern "C" fn( - plan: *const FFIExecutionPlan, + plan: *const FFI_ExecutionPlan, num_children: &mut usize, - out: &mut *const FFIExecutionPlan, + out: &mut *const FFI_ExecutionPlan, ) -> i32, >, @@ -43,24 +43,24 @@ pub struct ExecutionPlanPrivateData { pub last_error: Option, } -unsafe extern "C" fn properties_fn_wrapper(plan: *const FFIExecutionPlan) -> FFIPlanProperties { +unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let properties = (*private_data).plan.properties(); properties.into() } unsafe extern "C" fn children_fn_wrapper( - plan: *const FFIExecutionPlan, + plan: *const FFI_ExecutionPlan, num_children: &mut usize, - out: &mut *const FFIExecutionPlan, + out: &mut *const FFI_ExecutionPlan, ) -> i32 { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let children = (*private_data).plan.children(); *num_children = children.len(); - let children: Vec = children + let children: Vec = children .into_iter() - .map(|child| FFIExecutionPlan::new(child.clone())) + .map(|child| FFI_ExecutionPlan::new(child.clone())) .collect(); *out = children.as_ptr(); @@ -72,7 +72,7 @@ unsafe extern "C" fn children_fn_wrapper( // in the provider's side. #[derive(Debug)] pub struct ExportedExecutionPlan { - plan: *const FFIExecutionPlan, + plan: *const FFI_ExecutionPlan, properties: PlanProperties, children: Vec>, } @@ -88,13 +88,13 @@ impl DisplayAs for ExportedExecutionPlan { ) -> std::fmt::Result { write!( f, - "FFIExecutionPlan(number_of_children={})", + "FFI_ExecutionPlan(number_of_children={})", self.children.len(), ) } } -impl FFIExecutionPlan { +impl FFI_ExecutionPlan { pub fn new(plan: Arc) -> Self { let private_data = Box::new(ExecutionPlanPrivateData { plan, @@ -123,25 +123,25 @@ impl ExportedExecutionPlan { /// # Safety /// /// The caller must ensure the pointer provided points to a valid implementation - /// of FFIExecutionPlan - pub unsafe fn new(plan: *const FFIExecutionPlan) -> Result { + /// of FFI_ExecutionPlan + pub unsafe fn new(plan: *const FFI_ExecutionPlan) -> Result { let properties = unsafe { let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( - "properties not implemented on FFIExecutionPlan".to_string(), + "properties not implemented on FFI_ExecutionPlan".to_string(), ))?; properties_fn(plan).try_into()? }; let children = unsafe { let children_fn = (*plan).children.ok_or(DataFusionError::NotImplemented( - "children not implemented on FFIExecutionPlan".to_string(), + "children not implemented on FFI_ExecutionPlan".to_string(), ))?; let mut num_children = 0; - let mut children_ptr: *const FFIExecutionPlan = null(); + let mut children_ptr: *const FFI_ExecutionPlan = null(); if children_fn(plan, &mut num_children, &mut children_ptr) != 0 { return Err(DataFusionError::Plan( - "Error getting children for FFIExecutionPlan".to_string(), + "Error getting children for FFI_ExecutionPlan".to_string(), )); } @@ -197,7 +197,7 @@ impl ExecutionPlan for ExportedExecutionPlan { } } -impl DisplayAs for FFIExecutionPlan { +impl DisplayAs for FFI_ExecutionPlan { fn fmt_as( &self, t: datafusion::physical_plan::DisplayFormatType, @@ -210,47 +210,47 @@ impl DisplayAs for FFIExecutionPlan { #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] -pub struct FFIPlanProperties { +pub struct FFI_PlanProperties { // We will build equivalence properties from teh schema and ordersing (new_with_orderings). This is how we do ti in dataset_exec - // pub eq_properties: Option EquivalenceProperties>, + // pub eq_properties: Option EquivalenceProperties>, // Returns protobuf serialized bytes of the partitioning pub output_partitioning: Option< unsafe extern "C" fn( - plan: *const FFIPlanProperties, + plan: *const FFI_PlanProperties, buffer_size: &mut usize, buffer_bytes: &mut *mut u8, ) -> i32, >, pub execution_mode: - Option FFIExecutionMode>, + Option FFI_ExecutionMode>, // PhysicalSortExprNodeCollection proto pub output_ordering: Option< unsafe extern "C" fn( - plan: *const FFIPlanProperties, + plan: *const FFI_PlanProperties, buffer_size: &mut usize, buffer_bytes: &mut *mut u8, ) -> i32, >, - pub schema: Option FFI_ArrowSchema>, + pub schema: Option FFI_ArrowSchema>, } -impl From<&PlanProperties> for FFIPlanProperties { +impl From<&PlanProperties> for FFI_PlanProperties { fn from(value: &PlanProperties) -> Self { todo!() } } -impl TryFrom for PlanProperties { +impl TryFrom for PlanProperties { type Error = DataFusionError; - fn try_from(value: FFIPlanProperties) -> std::result::Result { + fn try_from(value: FFI_PlanProperties) -> std::result::Result { unsafe { let schema_fn = value.schema.ok_or(DataFusionError::NotImplemented( - "schema() not implemented on FFIPlanProperties".to_string(), + "schema() not implemented on FFI_PlanProperties".to_string(), ))?; let ffi_schema = schema_fn(&value); let schema: Schema = (&ffi_schema).try_into()?; @@ -258,13 +258,13 @@ impl TryFrom for PlanProperties { let ordering_fn = value .output_ordering .ok_or(DataFusionError::NotImplemented( - "output_ordering() not implemented on FFIPlanProperties".to_string(), + "output_ordering() not implemented on FFI_PlanProperties".to_string(), ))?; let mut buff_size = 0; let mut buff = null_mut(); if ordering_fn(&value, &mut buff_size, &mut buff) != 0 { return Err(DataFusionError::Plan( - "Error occurred during FFI call to output_ordering in FFIPlanProperties" + "Error occurred during FFI call to output_ordering in FFI_PlanProperties" .to_string(), )); } @@ -287,11 +287,11 @@ impl TryFrom for PlanProperties { value .output_partitioning .ok_or(DataFusionError::NotImplemented( - "output_partitioning() not implemented on FFIPlanProperties".to_string(), + "output_partitioning() not implemented on FFI_PlanProperties".to_string(), ))?; if partitioning_fn(&value, &mut buff_size, &mut buff) != 0 { return Err(DataFusionError::Plan( - "Error occurred during FFI call to output_partitioning in FFIPlanProperties" + "Error occurred during FFI call to output_partitioning in FFI_PlanProperties" .to_string(), )); } @@ -309,7 +309,7 @@ impl TryFrom for PlanProperties { .unwrap(); let execution_mode_fn = value.execution_mode.ok_or(DataFusionError::NotImplemented( - "execution_mode() not implemented on FFIPlanProperties".to_string(), + "execution_mode() not implemented on FFI_PlanProperties".to_string(), ))?; let execution_mode = execution_mode_fn(&value).into(); @@ -319,7 +319,7 @@ impl TryFrom for PlanProperties { Ok(Self::new(eq_properties, partitioning, execution_mode)) } } - // fn from(value: FFIPlanProperties) -> Self { + // fn from(value: FFI_PlanProperties) -> Self { // let schema = self.schema() // let equiv_prop = EquivalenceProperties::new_with_orderings(schema, orderings); @@ -327,7 +327,7 @@ impl TryFrom for PlanProperties { } #[repr(C)] -pub enum FFIExecutionMode { +pub enum FFI_ExecutionMode { Bounded, Unbounded, @@ -335,22 +335,22 @@ pub enum FFIExecutionMode { PipelineBreaking, } -impl From for FFIExecutionMode { +impl From for FFI_ExecutionMode { fn from(value: ExecutionMode) -> Self { match value { - ExecutionMode::Bounded => FFIExecutionMode::Bounded, - ExecutionMode::Unbounded => FFIExecutionMode::Unbounded, - ExecutionMode::PipelineBreaking => FFIExecutionMode::PipelineBreaking, + ExecutionMode::Bounded => FFI_ExecutionMode::Bounded, + ExecutionMode::Unbounded => FFI_ExecutionMode::Unbounded, + ExecutionMode::PipelineBreaking => FFI_ExecutionMode::PipelineBreaking, } } } -impl From for ExecutionMode { - fn from(value: FFIExecutionMode) -> Self { +impl From for ExecutionMode { + fn from(value: FFI_ExecutionMode) -> Self { match value { - FFIExecutionMode::Bounded => ExecutionMode::Bounded, - FFIExecutionMode::Unbounded => ExecutionMode::Unbounded, - FFIExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, + FFI_ExecutionMode::Bounded => ExecutionMode::Bounded, + FFI_ExecutionMode::Unbounded => ExecutionMode::Unbounded, + FFI_ExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, } } } diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index 492506afa..a86f76b23 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -23,7 +23,7 @@ use datafusion::{ use tokio::runtime::Runtime; use super::{ - execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFIExecutionPlan}, + execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFI_ExecutionPlan}, session_config::{FFI_SessionConfig, SessionConfigPrivateData}, }; use datafusion::error::Result; @@ -44,7 +44,7 @@ pub struct FFI_TableProvider { n_filters: c_int, filters: *const *const c_char, limit: c_int, - out: *mut FFIExecutionPlan, + out: *mut FFI_ExecutionPlan, ) -> c_int, >, pub private_data: *mut c_void, @@ -73,7 +73,7 @@ unsafe extern "C" fn provider_scan( n_filters: c_int, filters: *const *const c_char, limit: c_int, - mut out: *mut FFIExecutionPlan, + mut out: *mut FFI_ExecutionPlan, ) -> c_int { let config = unsafe { (*session_config).private_data as *const SessionConfigPrivateData }; let session = SessionStateBuilder::new() @@ -135,7 +135,7 @@ impl ExportedTableProvider { projections: Option<&Vec>, filters: Vec, limit: Option, - ) -> Result { + ) -> Result { let private_data = self.get_private_data(); let provider = &private_data.provider; @@ -155,10 +155,10 @@ impl ExportedTableProvider { // last_error: None, // }); - // Ok(FFIExecutionPlan { + // Ok(FFI_ExecutionPlan { // private_data: Box::into_raw(plan_ptr) as *mut c_void, // }) - Ok(FFIExecutionPlan::new(plan)) + Ok(FFI_ExecutionPlan::new(plan)) } } @@ -273,7 +273,7 @@ impl TableProvider for FFI_TableProvider { None => -1, }; - let mut out = FFIExecutionPlan::empty(); + let mut out = FFI_ExecutionPlan::empty(); let plan = unsafe { let err_code = scan_fn( From 0d23d55f1a13745d14dce85560b9b70d63dcbad1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 8 Oct 2024 05:13:04 -0400 Subject: [PATCH 09/28] Intermediate testing and troubleshooting --- src/context.rs | 2 +- src/ffi/execution_plan.rs | 268 ++++++++++++++++++++++++++++++++------ src/ffi/table_provider.rs | 58 +++++---- 3 files changed, 260 insertions(+), 68 deletions(-) diff --git a/src/context.rs b/src/context.rs index 2e9ac95c0..cc01b3217 100644 --- a/src/context.rs +++ b/src/context.rs @@ -586,7 +586,7 @@ impl PySessionContext { println!("Found provider version {}", provider.version); let schema = provider.schema(); - println!("Got schema through TableProvider trait {}", schema); + println!("Got schema through TableProvider trait."); let _ = self.ctx.register_table(name, Arc::new(provider))?; } diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 6428ab0db..4da8ae958 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -1,5 +1,6 @@ use std::{ ffi::{c_void, CString}, + num, ptr::{null, null_mut}, slice, sync::Arc, @@ -14,6 +15,9 @@ use datafusion::{error::Result, physical_expr::EquivalenceProperties, prelude::S use datafusion_proto::{ physical_plan::{ from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning}, + to_proto::{ + serialize_partitioning, serialize_physical_exprs, serialize_physical_sort_exprs, + }, DefaultPhysicalExtensionCodec, }, protobuf::{partitioning, Partitioning, PhysicalSortExprNodeCollection}, @@ -31,9 +35,10 @@ pub struct FFI_ExecutionPlan { unsafe extern "C" fn( plan: *const FFI_ExecutionPlan, num_children: &mut usize, - out: &mut *const FFI_ExecutionPlan, - ) -> i32, + err_code: &mut i32, + ) -> *mut *const FFI_ExecutionPlan, >, + pub name: unsafe extern "C" fn(plan: *const FFI_ExecutionPlan) -> CString, pub private_data: *mut c_void, } @@ -41,30 +46,44 @@ pub struct FFI_ExecutionPlan { pub struct ExecutionPlanPrivateData { pub plan: Arc, pub last_error: Option, + pub children: Vec<*const FFI_ExecutionPlan>, } unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let properties = (*private_data).plan.properties(); - properties.into() + properties.clone().into() } unsafe extern "C" fn children_fn_wrapper( plan: *const FFI_ExecutionPlan, num_children: &mut usize, - out: &mut *const FFI_ExecutionPlan, -) -> i32 { + err_code: &mut i32, +) -> *mut *const FFI_ExecutionPlan { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - let children = (*private_data).plan.children(); - *num_children = children.len(); - let children: Vec = children - .into_iter() - .map(|child| FFI_ExecutionPlan::new(child.clone())) - .collect(); - *out = children.as_ptr(); + *num_children = (*private_data).children.len(); + // let children: Vec = children + // .into_iter() + // .map(|child| FFI_ExecutionPlan::new(child.clone())) + // .collect(); - 0 + *err_code = 0; + + let mut children: Vec<_> = (*private_data).children.to_owned(); + let children_ptr = children.as_mut_ptr(); + + std::mem::forget(children); + + children_ptr +} + +unsafe extern "C" fn name_fn_wrapper(plan: *const FFI_ExecutionPlan) -> CString { + let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + + let name = (*private_data).plan.name(); + + CString::new(name).unwrap_or(CString::new("unable to parse execution plan name").unwrap()) } // Since the trait ExecutionPlan requires borrowed values, we wrap our FFI. @@ -72,6 +91,7 @@ unsafe extern "C" fn children_fn_wrapper( // in the provider's side. #[derive(Debug)] pub struct ExportedExecutionPlan { + name: String, plan: *const FFI_ExecutionPlan, properties: PlanProperties, children: Vec>, @@ -95,26 +115,38 @@ impl DisplayAs for ExportedExecutionPlan { } impl FFI_ExecutionPlan { + /// This function is called on the provider's side. pub fn new(plan: Arc) -> Self { + let children = plan + .children() + .into_iter() + .map(|child| Box::new(FFI_ExecutionPlan::new(child.clone()))) + .map(|child| Box::into_raw(child) as *const FFI_ExecutionPlan) + .collect(); + println!("children collected"); + let private_data = Box::new(ExecutionPlanPrivateData { plan, + children, last_error: None, }); + println!("generated private data, ready to return"); Self { properties: Some(properties_fn_wrapper), children: Some(children_fn_wrapper), + name: name_fn_wrapper, private_data: Box::into_raw(private_data) as *mut c_void, } } - pub fn empty() -> Self { - Self { - properties: None, - children: None, - private_data: std::ptr::null_mut(), - } - } + // pub fn empty() -> Self { + // Self { + // properties: None, + // children: None, + // private_data: std::ptr::null_mut(), + // } + // } } impl ExportedExecutionPlan { @@ -125,38 +157,66 @@ impl ExportedExecutionPlan { /// The caller must ensure the pointer provided points to a valid implementation /// of FFI_ExecutionPlan pub unsafe fn new(plan: *const FFI_ExecutionPlan) -> Result { + let name_fn = (*plan).name; + let name_cstr = name_fn(plan); + let name = name_cstr + .into_string() + .unwrap_or("Unable to parse FFI_ExecutionPlan name".to_string()); + + println!("entered ExportedExecutionPlan::new"); let properties = unsafe { let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( "properties not implemented on FFI_ExecutionPlan".to_string(), ))?; + println!("About to call properties fn"); properties_fn(plan).try_into()? }; + println!("created properties"); let children = unsafe { let children_fn = (*plan).children.ok_or(DataFusionError::NotImplemented( "children not implemented on FFI_ExecutionPlan".to_string(), ))?; let mut num_children = 0; - let mut children_ptr: *const FFI_ExecutionPlan = null(); + let mut err_code = 0; + let mut children_ptr = children_fn(plan, &mut num_children, &mut err_code); + + println!( + "We called the FFI function children so the provider told us we have {} children", + num_children + ); - if children_fn(plan, &mut num_children, &mut children_ptr) != 0 { + if err_code != 0 { return Err(DataFusionError::Plan( "Error getting children for FFI_ExecutionPlan".to_string(), )); } - let ffi_vec = Vec::from_raw_parts(&mut children_ptr, num_children, num_children); + let ffi_vec = Vec::from_raw_parts(children_ptr, num_children, num_children); let maybe_children: Result> = ffi_vec .into_iter() .map(|child| { - ExportedExecutionPlan::new(child).map(|c| Arc::new(c) as Arc) + println!("Ok, we are about to examine a child ffi_executionplan"); + if let Some(props_fn) = (*child).properties { + println!("We do have properties on the child "); + let child_props = props_fn(child); + println!("Child schema {:?}", child_props.schema); + } + + let child_plan = ExportedExecutionPlan::new(child); + + child_plan.map(|c| Arc::new(c) as Arc) }) .collect(); + println!("finsihed maybe children"); maybe_children? }; + println!("About to return ExportedExecurtionPlan"); + Ok(Self { + name, plan, properties, children, @@ -166,11 +226,11 @@ impl ExportedExecutionPlan { impl ExecutionPlan for ExportedExecutionPlan { fn name(&self) -> &str { - todo!() + &self.name } fn as_any(&self) -> &dyn std::any::Any { - todo!() + self } fn properties(&self) -> &datafusion::physical_plan::PlanProperties { @@ -178,7 +238,10 @@ impl ExecutionPlan for ExportedExecutionPlan { } fn children(&self) -> Vec<&Arc> { - self.children.iter().collect() + self.children + .iter() + .map(|p| p as &Arc) + .collect() } fn with_new_children( @@ -210,6 +273,7 @@ impl DisplayAs for FFI_ExecutionPlan { #[repr(C)] #[derive(Debug)] #[allow(missing_docs)] +#[allow(non_camel_case_types)] pub struct FFI_PlanProperties { // We will build equivalence properties from teh schema and ordersing (new_with_orderings). This is how we do ti in dataset_exec // pub eq_properties: Option EquivalenceProperties>, @@ -236,14 +300,123 @@ pub struct FFI_PlanProperties { >, pub schema: Option FFI_ArrowSchema>, + + pub private_data: *mut c_void, } -impl From<&PlanProperties> for FFI_PlanProperties { - fn from(value: &PlanProperties) -> Self { - todo!() +unsafe extern "C" fn output_partitioning_fn_wrapper( + properties: *const FFI_PlanProperties, + buffer_size: &mut usize, + buffer_bytes: &mut *mut u8, +) -> i32 { + // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + // let properties = (*private_data).plan.properties(); + // properties.clone().into() + let private_data = (*properties).private_data as *const PlanProperties; + let partitioning = (*private_data).output_partitioning(); + + let codec = DefaultPhysicalExtensionCodec {}; + let partitioning_data = match serialize_partitioning(partitioning, &codec) { + Ok(p) => p, + Err(_) => return 1, + }; + + let mut partition_bytes = partitioning_data.encode_to_vec(); + *buffer_size = partition_bytes.len(); + *buffer_bytes = partition_bytes.as_mut_ptr(); + + std::mem::forget(partition_bytes); + + 0 +} + +unsafe extern "C" fn execution_mode_fn_wrapper( + properties: *const FFI_PlanProperties, +) -> FFI_ExecutionMode { + // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + // let properties = (*private_data).plan.properties(); + // properties.clone().into() + let private_data = (*properties).private_data as *const PlanProperties; + let execution_mode = (*private_data).execution_mode(); + + execution_mode.into() +} + +unsafe extern "C" fn output_ordering_fn_wrapper( + properties: *const FFI_PlanProperties, + buffer_size: &mut usize, + buffer_bytes: &mut *mut u8, +) -> i32 { + // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + // let properties = (*private_data).plan.properties(); + // properties.clone().into() + let private_data = (*properties).private_data as *const PlanProperties; + let output_ordering = match (*private_data).output_ordering() { + Some(o) => o, + None => { + *buffer_size = 0; + return 0; + } + } + .to_owned(); + + let codec = DefaultPhysicalExtensionCodec {}; + let physical_sort_expr_nodes = match serialize_physical_sort_exprs(output_ordering, &codec) { + Ok(p) => p, + Err(_) => return 1, + }; + + let ordering_data = PhysicalSortExprNodeCollection { + physical_sort_expr_nodes, + }; + + let mut ordering_bytes = ordering_data.encode_to_vec(); + *buffer_size = ordering_bytes.len(); + *buffer_bytes = ordering_bytes.as_mut_ptr(); + std::mem::forget(ordering_bytes); + + 0 +} + +// pub schema: Option FFI_ArrowSchema>, +unsafe extern "C" fn schema_fn_wrapper(properties: *const FFI_PlanProperties) -> FFI_ArrowSchema { + let private_data = (*properties).private_data as *const PlanProperties; + let schema = (*private_data).eq_properties.schema(); + + // This does silently fail because TableProvider does not return a result + // so we expect it to always pass. Maybe some logging should be added. + FFI_ArrowSchema::try_from(schema.as_ref()).unwrap_or(FFI_ArrowSchema::empty()) +} + +impl From for FFI_PlanProperties { + fn from(value: PlanProperties) -> Self { + let private_data = Box::new(value); + + Self { + output_partitioning: Some(output_partitioning_fn_wrapper), + execution_mode: Some(execution_mode_fn_wrapper), + output_ordering: Some(output_ordering_fn_wrapper), + schema: Some(schema_fn_wrapper), + private_data: Box::into_raw(private_data) as *mut c_void, + } } } +// /// Creates a new [`FFI_TableProvider`]. +// pub fn new(provider: Box) -> Self { +// let private_data = Box::new(ProviderPrivateData { +// provider, +// last_error: None, +// }); + +// Self { +// version: 2, +// schema: Some(provider_schema), +// scan: Some(provider_scan), +// private_data: Box::into_raw(private_data) as *mut c_void, +// } +// } + impl TryFrom for PlanProperties { type Error = DataFusionError; @@ -268,20 +441,27 @@ impl TryFrom for PlanProperties { .to_string(), )); } - let data = slice::from_raw_parts(buff, buff_size); - - let proto_output_ordering = PhysicalSortExprNodeCollection::decode(data) - .map_err(|e| DataFusionError::External(Box::new(e)))?; // TODO we will need to get these, but unsure if it happesn on the provider or consumer right now. let default_ctx = SessionContext::new(); let codex = DefaultPhysicalExtensionCodec {}; - let orderings = parse_physical_sort_exprs( - &proto_output_ordering.physical_sort_expr_nodes, - &default_ctx, - &schema, - &codex, - )?; + + let orderings = match buff_size == 0 { + true => None, + false => { + let data = slice::from_raw_parts(buff, buff_size); + + let proto_output_ordering = PhysicalSortExprNodeCollection::decode(data) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + Some(parse_physical_sort_exprs( + &proto_output_ordering.physical_sort_expr_nodes, + &default_ctx, + &schema, + &codex, + )?) + } + }; let partitioning_fn = value @@ -313,8 +493,12 @@ impl TryFrom for PlanProperties { ))?; let execution_mode = execution_mode_fn(&value).into(); - let eq_properties = - EquivalenceProperties::new_with_orderings(Arc::new(schema), &[orderings]); + let eq_properties = match orderings { + Some(ordering) => { + EquivalenceProperties::new_with_orderings(Arc::new(schema), &[ordering]) + } + None => EquivalenceProperties::new(Arc::new(schema)), + }; Ok(Self::new(eq_properties, partitioning, execution_mode)) } diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index a86f76b23..d667adb2a 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -1,6 +1,7 @@ use std::{ any::Any, ffi::{c_char, c_int, c_void, CStr, CString}, + ptr::null_mut, sync::Arc, }; @@ -44,8 +45,8 @@ pub struct FFI_TableProvider { n_filters: c_int, filters: *const *const c_char, limit: c_int, - out: *mut FFI_ExecutionPlan, - ) -> c_int, + err_code: *mut c_int, + ) -> *mut FFI_ExecutionPlan, >, pub private_data: *mut c_void, } @@ -65,7 +66,7 @@ unsafe extern "C" fn provider_schema(provider: *const FFI_TableProvider) -> FFI_ ExportedTableProvider(provider).provider_schema() } -unsafe extern "C" fn provider_scan( +unsafe extern "C" fn scan_fn_wrapper( provider: *const FFI_TableProvider, session_config: *const FFI_SessionConfig, n_projections: c_int, @@ -73,8 +74,9 @@ unsafe extern "C" fn provider_scan( n_filters: c_int, filters: *const *const c_char, limit: c_int, - mut out: *mut FFI_ExecutionPlan, -) -> c_int { + err_code: *mut c_int, +) -> *mut FFI_ExecutionPlan { + println!("entered scan_fn_wrapper"); let config = unsafe { (*session_config).private_data as *const SessionConfigPrivateData }; let session = SessionStateBuilder::new() .with_config((*config).config.clone()) @@ -106,12 +108,17 @@ unsafe extern "C" fn provider_scan( limit, ); + println!("leaving scan_fn_wrapper, has plan? {}", plan.is_ok()); + match plan { - Ok(mut plan) => { - out = &mut plan; - 0 + Ok(plan) => { + *err_code = 0; + plan + } + Err(_) => { + *err_code = 1; + null_mut() } - Err(_) => 1, } } @@ -135,7 +142,7 @@ impl ExportedTableProvider { projections: Option<&Vec>, filters: Vec, limit: Option, - ) -> Result { + ) -> Result<*mut FFI_ExecutionPlan> { let private_data = self.get_private_data(); let provider = &private_data.provider; @@ -150,15 +157,8 @@ impl ExportedTableProvider { let runtime = Runtime::new().unwrap(); let plan = runtime.block_on(provider.scan(session, projections, &filter_exprs, limit))?; - // let plan_ptr = Box::new(ExecutionPlanPrivateData { - // plan, - // last_error: None, - // }); - - // Ok(FFI_ExecutionPlan { - // private_data: Box::into_raw(plan_ptr) as *mut c_void, - // }) - Ok(FFI_ExecutionPlan::new(plan)) + let plan_boxed = Box::new(FFI_ExecutionPlan::new(plan)); + Ok(Box::into_raw(plan_boxed)) } } @@ -187,7 +187,7 @@ impl FFI_TableProvider { Self { version: 2, schema: Some(provider_schema), - scan: Some(provider_scan), + scan: Some(scan_fn_wrapper), private_data: Box::into_raw(private_data) as *mut c_void, } } @@ -273,10 +273,10 @@ impl TableProvider for FFI_TableProvider { None => -1, }; - let mut out = FFI_ExecutionPlan::empty(); - + println!("Within scan about to call unsafe scan_fn"); + let mut err_code = 0; let plan = unsafe { - let err_code = scan_fn( + let plan_ptr = scan_fn( self, &session_config, n_projections, @@ -284,7 +284,7 @@ impl TableProvider for FFI_TableProvider { n_filters, filters_ptr.as_ptr(), limit, - &mut out, + &mut err_code, ); if 0 != err_code { @@ -293,8 +293,16 @@ impl TableProvider for FFI_TableProvider { )); } - ExportedExecutionPlan::new(&out)? + println!( + "Finished scan_fn inside FFI_TableProvider::scan {}", + plan_ptr.is_null() + ); + + let p = ExportedExecutionPlan::new(plan_ptr)?; + println!("ExportedExecutionPlan::new returned inside scan()"); + p }; + println!("Scan returned with some plan."); Ok(Arc::new(plan)) } From 7d42f7389cb7331780adc4bad3cb9124f32c7cd9 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 9 Oct 2024 10:54:22 -0400 Subject: [PATCH 10/28] Adding record batch stream ffi representation --- src/ffi/execution_plan.rs | 79 +++++-- src/ffi/mod.rs | 1 + src/ffi/record_batch_stream.rs | 416 +++++++++++++++++++++++++++++++++ src/ffi/table_provider.rs | 24 +- 4 files changed, 486 insertions(+), 34 deletions(-) create mode 100644 src/ffi/record_batch_stream.rs diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 4da8ae958..02944ff2b 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -1,28 +1,35 @@ use std::{ - ffi::{c_void, CString}, - num, + ffi::{c_char, c_void, CString}, ptr::{null, null_mut}, slice, sync::Arc, }; -use arrow::{datatypes::Schema, ffi::FFI_ArrowSchema}; +use arrow::{ + array::RecordBatchReader, + datatypes::Schema, + ffi::{FFI_ArrowArray, FFI_ArrowSchema}, + ffi_stream::FFI_ArrowArrayStream, +}; use datafusion::{ error::DataFusionError, + execution::TaskContext, physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}, }; use datafusion::{error::Result, physical_expr::EquivalenceProperties, prelude::SessionContext}; use datafusion_proto::{ physical_plan::{ from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning}, - to_proto::{ - serialize_partitioning, serialize_physical_exprs, serialize_physical_sort_exprs, - }, + to_proto::{serialize_partitioning, serialize_physical_sort_exprs}, DefaultPhysicalExtensionCodec, }, - protobuf::{partitioning, Partitioning, PhysicalSortExprNodeCollection}, + protobuf::{Partitioning, PhysicalSortExprNodeCollection}, }; -use prost::{DecodeError, Message}; +use futures::{StreamExt, TryStreamExt}; +use prost::Message; +use tokio::runtime::Runtime; + +use super::record_batch_stream::record_batch_to_arrow_stream; #[repr(C)] #[derive(Debug)] @@ -38,7 +45,13 @@ pub struct FFI_ExecutionPlan { err_code: &mut i32, ) -> *mut *const FFI_ExecutionPlan, >, - pub name: unsafe extern "C" fn(plan: *const FFI_ExecutionPlan) -> CString, + pub name: unsafe extern "C" fn(plan: *const FFI_ExecutionPlan) -> *const c_char, + + pub execute: unsafe extern "C" fn( + plan: *const FFI_ExecutionPlan, + partition: usize, + err_code: &mut i32, + ) -> *const FFI_ArrowArrayStream, pub private_data: *mut c_void, } @@ -47,6 +60,7 @@ pub struct ExecutionPlanPrivateData { pub plan: Arc, pub last_error: Option, pub children: Vec<*const FFI_ExecutionPlan>, + pub context: Arc, } unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { @@ -63,11 +77,6 @@ unsafe extern "C" fn children_fn_wrapper( let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; *num_children = (*private_data).children.len(); - // let children: Vec = children - // .into_iter() - // .map(|child| FFI_ExecutionPlan::new(child.clone())) - // .collect(); - *err_code = 0; let mut children: Vec<_> = (*private_data).children.to_owned(); @@ -78,12 +87,36 @@ unsafe extern "C" fn children_fn_wrapper( children_ptr } -unsafe extern "C" fn name_fn_wrapper(plan: *const FFI_ExecutionPlan) -> CString { +unsafe extern "C" fn execute_fn_wrapper( + plan: *const FFI_ExecutionPlan, + partition: usize, + err_code: &mut i32, +) -> *const FFI_ArrowArrayStream { + let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; + + let mut record_batch_stream = match (*private_data) + .plan + .execute(partition, (*private_data).context.clone()) + { + Ok(rbs) => rbs, + Err(_e) => { + *err_code = 1; + return null(); + } + }; + + let ffi_stream = Box::new(record_batch_to_arrow_stream(record_batch_stream)); + + Box::into_raw(ffi_stream) +} +unsafe extern "C" fn name_fn_wrapper(plan: *const FFI_ExecutionPlan) -> *const c_char { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; let name = (*private_data).plan.name(); - CString::new(name).unwrap_or(CString::new("unable to parse execution plan name").unwrap()) + CString::new(name) + .unwrap_or(CString::new("unable to parse execution plan name").unwrap()) + .into_raw() } // Since the trait ExecutionPlan requires borrowed values, we wrap our FFI. @@ -116,11 +149,11 @@ impl DisplayAs for ExportedExecutionPlan { impl FFI_ExecutionPlan { /// This function is called on the provider's side. - pub fn new(plan: Arc) -> Self { + pub fn new(plan: Arc, context: Arc) -> Self { let children = plan .children() .into_iter() - .map(|child| Box::new(FFI_ExecutionPlan::new(child.clone()))) + .map(|child| Box::new(FFI_ExecutionPlan::new(child.clone(), context.clone()))) .map(|child| Box::into_raw(child) as *const FFI_ExecutionPlan) .collect(); println!("children collected"); @@ -128,6 +161,7 @@ impl FFI_ExecutionPlan { let private_data = Box::new(ExecutionPlanPrivateData { plan, children, + context, last_error: None, }); println!("generated private data, ready to return"); @@ -136,6 +170,7 @@ impl FFI_ExecutionPlan { properties: Some(properties_fn_wrapper), children: Some(children_fn_wrapper), name: name_fn_wrapper, + execute: execute_fn_wrapper, private_data: Box::into_raw(private_data) as *mut c_void, } } @@ -159,9 +194,10 @@ impl ExportedExecutionPlan { pub unsafe fn new(plan: *const FFI_ExecutionPlan) -> Result { let name_fn = (*plan).name; let name_cstr = name_fn(plan); - let name = name_cstr - .into_string() - .unwrap_or("Unable to parse FFI_ExecutionPlan name".to_string()); + let name = CString::from_raw(name_cstr as *mut c_char) + .to_str() + .unwrap_or("Unable to parse FFI_ExecutionPlan name") + .to_string(); println!("entered ExportedExecutionPlan::new"); let properties = unsafe { @@ -511,6 +547,7 @@ impl TryFrom for PlanProperties { } #[repr(C)] +#[allow(non_camel_case_types)] pub enum FFI_ExecutionMode { Bounded, diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 57bfc97eb..c28f95fb3 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -1,4 +1,5 @@ pub mod execution_plan; +pub mod record_batch_stream; pub mod session_config; pub mod table_provider; diff --git a/src/ffi/record_batch_stream.rs b/src/ffi/record_batch_stream.rs new file mode 100644 index 000000000..8fb6cb159 --- /dev/null +++ b/src/ffi/record_batch_stream.rs @@ -0,0 +1,416 @@ +use std::{ + ffi::{c_char, c_int, c_void, CString}, + ptr::addr_of, +}; + +use arrow::array::Array; +use arrow::{ + array::StructArray, + ffi::{FFI_ArrowArray, FFI_ArrowSchema}, + ffi_stream::FFI_ArrowArrayStream, +}; +use datafusion::execution::SendableRecordBatchStream; +use futures::{executor::block_on, TryStreamExt}; + +pub fn record_batch_to_arrow_stream(stream: SendableRecordBatchStream) -> FFI_ArrowArrayStream { + let private_data = Box::new(RecoredBatchStreamPrivateData { + stream, + last_error: None, + }); + + FFI_ArrowArrayStream { + get_schema: Some(get_schema), + get_next: Some(get_next), + get_last_error: Some(get_last_error), + release: Some(release_stream), + private_data: Box::into_raw(private_data) as *mut c_void, + } +} + +struct RecoredBatchStreamPrivateData { + stream: SendableRecordBatchStream, + last_error: Option, +} + +const ENOMEM: i32 = 12; +const EIO: i32 = 5; +const EINVAL: i32 = 22; +const ENOSYS: i32 = 78; + +// callback used to drop [FFI_ArrowArrayStream] when it is exported. +unsafe extern "C" fn release_stream(stream: *mut FFI_ArrowArrayStream) { + if stream.is_null() { + return; + } + let stream = &mut *stream; + + stream.get_schema = None; + stream.get_next = None; + stream.get_last_error = None; + + let private_data = Box::from_raw(stream.private_data as *mut RecoredBatchStreamPrivateData); + drop(private_data); + + stream.release = None; +} + +// The callback used to get array schema +unsafe extern "C" fn get_schema( + stream: *mut FFI_ArrowArrayStream, + schema: *mut FFI_ArrowSchema, +) -> c_int { + ExportedRecordBatchStream { stream }.get_schema(schema) +} + +// The callback used to get next array +unsafe extern "C" fn get_next( + stream: *mut FFI_ArrowArrayStream, + array: *mut FFI_ArrowArray, +) -> c_int { + ExportedRecordBatchStream { stream }.get_next(array) +} + +// The callback used to get the error from last operation on the `FFI_ArrowArrayStream` +unsafe extern "C" fn get_last_error(stream: *mut FFI_ArrowArrayStream) -> *const c_char { + let mut ffi_stream = ExportedRecordBatchStream { stream }; + // The consumer should not take ownership of this string, we should return + // a const pointer to it. + match ffi_stream.get_last_error() { + Some(err_string) => err_string.as_ptr(), + None => std::ptr::null(), + } +} + +struct ExportedRecordBatchStream { + stream: *mut FFI_ArrowArrayStream, +} + +impl ExportedRecordBatchStream { + fn get_private_data(&mut self) -> &mut RecoredBatchStreamPrivateData { + unsafe { &mut *((*self.stream).private_data as *mut RecoredBatchStreamPrivateData) } + } + + pub fn get_schema(&mut self, out: *mut FFI_ArrowSchema) -> i32 { + let private_data = self.get_private_data(); + let stream = &private_data.stream; + + let schema = FFI_ArrowSchema::try_from(stream.schema().as_ref()); + + match schema { + Ok(schema) => { + unsafe { std::ptr::copy(addr_of!(schema), out, 1) }; + std::mem::forget(schema); + 0 + } + Err(ref err) => { + private_data.last_error = Some( + CString::new(err.to_string()).expect("Error string has a null byte in it."), + ); + 1 + } + } + } + + pub fn get_next(&mut self, out: *mut FFI_ArrowArray) -> i32 { + let private_data = self.get_private_data(); + + let maybe_batch = block_on(private_data.stream.try_next()); + + match maybe_batch { + Ok(None) => { + // Marks ArrowArray released to indicate reaching the end of stream. + unsafe { std::ptr::write(out, FFI_ArrowArray::empty()) } + 0 + } + Ok(Some(batch)) => { + let struct_array = StructArray::from(batch); + let array = FFI_ArrowArray::new(&struct_array.to_data()); + + unsafe { std::ptr::write_unaligned(out, array) }; + 0 + } + Err(err) => { + private_data.last_error = Some( + CString::new(err.to_string()).expect("Error string has a null byte in it."), + ); + 1 + } + } + } + + pub fn get_last_error(&mut self) -> Option<&CString> { + self.get_private_data().last_error.as_ref() + } +} + +// /// A `RecordBatchReader` which imports Arrays from `FFI_ArrowArrayStream`. +// /// Struct used to fetch `RecordBatch` from the C Stream Interface. +// /// Its main responsibility is to expose `RecordBatchReader` functionality +// /// that requires [FFI_ArrowArrayStream]. +// #[derive(Debug)] +// pub struct ArrowArrayStreamReader { +// stream: FFI_ArrowArrayStream, +// schema: SchemaRef, +// } + +// /// Gets schema from a raw pointer of `FFI_ArrowArrayStream`. This is used when constructing +// /// `ArrowArrayStreamReader` to cache schema. +// fn get_stream_schema(stream_ptr: *mut FFI_ArrowArrayStream) -> Result { +// let mut schema = FFI_ArrowSchema::empty(); + +// let ret_code = unsafe { (*stream_ptr).get_schema.unwrap()(stream_ptr, &mut schema) }; + +// if ret_code == 0 { +// let schema = Schema::try_from(&schema)?; +// Ok(Arc::new(schema)) +// } else { +// Err(ArrowError::CDataInterface(format!( +// "Cannot get schema from input stream. Error code: {ret_code:?}" +// ))) +// } +// } + +// impl ArrowArrayStreamReader { +// /// Creates a new `ArrowArrayStreamReader` from a `FFI_ArrowArrayStream`. +// /// This is used to import from the C Stream Interface. +// #[allow(dead_code)] +// pub fn try_new(mut stream: FFI_ArrowArrayStream) -> Result { +// if stream.release.is_none() { +// return Err(ArrowError::CDataInterface( +// "input stream is already released".to_string(), +// )); +// } + +// let schema = get_stream_schema(&mut stream)?; + +// Ok(Self { stream, schema }) +// } + +// /// Creates a new `ArrowArrayStreamReader` from a raw pointer of `FFI_ArrowArrayStream`. +// /// +// /// Assumes that the pointer represents valid C Stream Interfaces. +// /// This function copies the content from the raw pointer and cleans up it to prevent +// /// double-dropping. The caller is responsible for freeing up the memory allocated for +// /// the pointer. +// /// +// /// # Safety +// /// +// /// See [`FFI_ArrowArrayStream::from_raw`] +// pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Result { +// Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream)) +// } + +// /// Get the last error from `ArrowArrayStreamReader` +// fn get_stream_last_error(&mut self) -> Option { +// let get_last_error = self.stream.get_last_error?; + +// let error_str = unsafe { get_last_error(&mut self.stream) }; +// if error_str.is_null() { +// return None; +// } + +// let error_str = unsafe { CStr::from_ptr(error_str) }; +// Some(error_str.to_string_lossy().to_string()) +// } +// } + +// impl Iterator for ArrowArrayStreamReader { +// type Item = Result; + +// fn next(&mut self) -> Option { +// let mut array = FFI_ArrowArray::empty(); + +// let ret_code = unsafe { self.stream.get_next.unwrap()(&mut self.stream, &mut array) }; + +// if ret_code == 0 { +// // The end of stream has been reached +// if array.is_released() { +// return None; +// } + +// let result = unsafe { +// from_ffi_and_data_type(array, DataType::Struct(self.schema().fields().clone())) +// }; +// Some(result.map(|data| RecordBatch::from(StructArray::from(data)))) +// } else { +// let last_error = self.get_stream_last_error(); +// let err = ArrowError::CDataInterface(last_error.unwrap()); +// Some(Err(err)) +// } +// } +// } + +// impl RecordBatchReader for ArrowArrayStreamReader { +// fn schema(&self) -> SchemaRef { +// self.schema.clone() +// } +// } + +// /// Exports a record batch reader to raw pointer of the C Stream Interface provided by the consumer. +// /// +// /// # Safety +// /// Assumes that the pointer represents valid C Stream Interfaces, both in memory +// /// representation and lifetime via the `release` mechanism. +// #[deprecated(note = "Use FFI_ArrowArrayStream::new")] +// pub unsafe fn export_reader_into_raw( +// reader: Box, +// out_stream: *mut FFI_ArrowArrayStream, +// ) { +// let stream = FFI_ArrowArrayStream::new(reader); + +// std::ptr::write_unaligned(out_stream, stream); +// } + +// #[cfg(test)] +// mod tests { +// use super::*; + +// use arrow_schema::Field; + +// use crate::array::Int32Array; +// use crate::ffi::from_ffi; + +// struct TestRecordBatchReader { +// schema: SchemaRef, +// iter: Box> + Send>, +// } + +// impl TestRecordBatchReader { +// pub fn new( +// schema: SchemaRef, +// iter: Box> + Send>, +// ) -> Box { +// Box::new(TestRecordBatchReader { schema, iter }) +// } +// } + +// impl Iterator for TestRecordBatchReader { +// type Item = Result; + +// fn next(&mut self) -> Option { +// self.iter.next() +// } +// } + +// impl RecordBatchReader for TestRecordBatchReader { +// fn schema(&self) -> SchemaRef { +// self.schema.clone() +// } +// } + +// fn _test_round_trip_export(arrays: Vec>) -> Result<()> { +// let schema = Arc::new(Schema::new(vec![ +// Field::new("a", arrays[0].data_type().clone(), true), +// Field::new("b", arrays[1].data_type().clone(), true), +// Field::new("c", arrays[2].data_type().clone(), true), +// ])); +// let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap(); +// let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _; + +// let reader = TestRecordBatchReader::new(schema.clone(), iter); + +// // Export a `RecordBatchReader` through `FFI_ArrowArrayStream` +// let mut ffi_stream = FFI_ArrowArrayStream::new(reader); + +// // Get schema from `FFI_ArrowArrayStream` +// let mut ffi_schema = FFI_ArrowSchema::empty(); +// let ret_code = unsafe { get_schema(&mut ffi_stream, &mut ffi_schema) }; +// assert_eq!(ret_code, 0); + +// let exported_schema = Schema::try_from(&ffi_schema).unwrap(); +// assert_eq!(&exported_schema, schema.as_ref()); + +// // Get array from `FFI_ArrowArrayStream` +// let mut produced_batches = vec![]; +// loop { +// let mut ffi_array = FFI_ArrowArray::empty(); +// let ret_code = unsafe { get_next(&mut ffi_stream, &mut ffi_array) }; +// assert_eq!(ret_code, 0); + +// // The end of stream has been reached +// if ffi_array.is_released() { +// break; +// } + +// let array = unsafe { from_ffi(ffi_array, &ffi_schema) }.unwrap(); + +// let record_batch = RecordBatch::from(StructArray::from(array)); +// produced_batches.push(record_batch); +// } + +// assert_eq!(produced_batches, vec![batch.clone(), batch]); + +// Ok(()) +// } + +// fn _test_round_trip_import(arrays: Vec>) -> Result<()> { +// let schema = Arc::new(Schema::new(vec![ +// Field::new("a", arrays[0].data_type().clone(), true), +// Field::new("b", arrays[1].data_type().clone(), true), +// Field::new("c", arrays[2].data_type().clone(), true), +// ])); +// let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap(); +// let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _; + +// let reader = TestRecordBatchReader::new(schema.clone(), iter); + +// // Import through `FFI_ArrowArrayStream` as `ArrowArrayStreamReader` +// let stream = FFI_ArrowArrayStream::new(reader); +// let stream_reader = ArrowArrayStreamReader::try_new(stream).unwrap(); + +// let imported_schema = stream_reader.schema(); +// assert_eq!(imported_schema, schema); + +// let mut produced_batches = vec![]; +// for batch in stream_reader { +// produced_batches.push(batch.unwrap()); +// } + +// assert_eq!(produced_batches, vec![batch.clone(), batch]); + +// Ok(()) +// } + +// #[test] +// fn test_stream_round_trip_export() -> Result<()> { +// let array = Int32Array::from(vec![Some(2), None, Some(1), None]); +// let array: Arc = Arc::new(array); + +// _test_round_trip_export(vec![array.clone(), array.clone(), array]) +// } + +// #[test] +// fn test_stream_round_trip_import() -> Result<()> { +// let array = Int32Array::from(vec![Some(2), None, Some(1), None]); +// let array: Arc = Arc::new(array); + +// _test_round_trip_import(vec![array.clone(), array.clone(), array]) +// } + +// #[test] +// fn test_error_import() -> Result<()> { +// let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)])); + +// let iter = Box::new(vec![Err(ArrowError::MemoryError("".to_string()))].into_iter()); + +// let reader = TestRecordBatchReader::new(schema.clone(), iter); + +// // Import through `FFI_ArrowArrayStream` as `ArrowArrayStreamReader` +// let stream = FFI_ArrowArrayStream::new(reader); +// let stream_reader = ArrowArrayStreamReader::try_new(stream).unwrap(); + +// let imported_schema = stream_reader.schema(); +// assert_eq!(imported_schema, schema); + +// let mut produced_batches = vec![]; +// for batch in stream_reader { +// produced_batches.push(batch); +// } + +// // The results should outlive the lifetime of the stream itself. +// assert_eq!(produced_batches.len(), 1); +// assert!(produced_batches[0].is_err()); + +// Ok(()) +// } +// } diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index d667adb2a..d38e0008f 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -16,15 +16,15 @@ use datafusion::{ common::DFSchema, datasource::TableType, error::DataFusionError, - execution::{context::SessionState, session_state::SessionStateBuilder}, + execution::{context::SessionState, session_state::SessionStateBuilder, TaskContext}, logical_expr::TableProviderFilterPushDown, physical_plan::ExecutionPlan, - prelude::Expr, + prelude::{Expr, SessionContext}, }; use tokio::runtime::Runtime; use super::{ - execution_plan::{ExecutionPlanPrivateData, ExportedExecutionPlan, FFI_ExecutionPlan}, + execution_plan::{ExportedExecutionPlan, FFI_ExecutionPlan}, session_config::{FFI_SessionConfig, SessionConfigPrivateData}, }; use datafusion::error::Result; @@ -81,6 +81,7 @@ unsafe extern "C" fn scan_fn_wrapper( let session = SessionStateBuilder::new() .with_config((*config).config.clone()) .build(); + let ctx = SessionContext::new_with_state(session); let num_projections: usize = n_projections.try_into().unwrap_or(0); @@ -101,12 +102,8 @@ unsafe extern "C" fn scan_fn_wrapper( let limit = limit.try_into().ok(); - let plan = ExportedTableProvider(provider).provider_scan( - &session, - maybe_projections, - filters_vec, - limit, - ); + let plan = + ExportedTableProvider(provider).provider_scan(&ctx, maybe_projections, filters_vec, limit); println!("leaving scan_fn_wrapper, has plan? {}", plan.is_ok()); @@ -138,7 +135,7 @@ impl ExportedTableProvider { pub fn provider_scan( &mut self, - session: &SessionState, + ctx: &SessionContext, projections: Option<&Vec>, filters: Vec, limit: Option, @@ -151,13 +148,14 @@ impl ExportedTableProvider { let filter_exprs = filters .into_iter() - .map(|expr_str| session.create_logical_expr(&expr_str, &df_schema)) + .map(|expr_str| ctx.state().create_logical_expr(&expr_str, &df_schema)) .collect::>>()?; let runtime = Runtime::new().unwrap(); - let plan = runtime.block_on(provider.scan(session, projections, &filter_exprs, limit))?; + let plan = + runtime.block_on(provider.scan(&ctx.state(), projections, &filter_exprs, limit))?; - let plan_boxed = Box::new(FFI_ExecutionPlan::new(plan)); + let plan_boxed = Box::new(FFI_ExecutionPlan::new(plan, ctx.task_ctx())); Ok(Box::into_raw(plan_boxed)) } } From 405a89fcd8e2f5fed7256bc9e7ad0996cfc0d600 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 9 Oct 2024 14:34:36 -0400 Subject: [PATCH 11/28] Mimimum viable product demonstrating foreign table provider --- src/ffi/execution_plan.rs | 66 ++++--- src/ffi/record_batch_stream.rs | 310 ++++----------------------------- src/ffi/table_provider.rs | 23 +-- 3 files changed, 71 insertions(+), 328 deletions(-) diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs index 02944ff2b..b04709ae8 100644 --- a/src/ffi/execution_plan.rs +++ b/src/ffi/execution_plan.rs @@ -1,19 +1,15 @@ use std::{ ffi::{c_char, c_void, CString}, - ptr::{null, null_mut}, + pin::Pin, + ptr::null_mut, slice, sync::Arc, }; -use arrow::{ - array::RecordBatchReader, - datatypes::Schema, - ffi::{FFI_ArrowArray, FFI_ArrowSchema}, - ffi_stream::FFI_ArrowArrayStream, -}; +use arrow::{datatypes::Schema, ffi::FFI_ArrowSchema, ffi_stream::FFI_ArrowArrayStream}; use datafusion::{ error::DataFusionError, - execution::TaskContext, + execution::{SendableRecordBatchStream, TaskContext}, physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}, }; use datafusion::{error::Result, physical_expr::EquivalenceProperties, prelude::SessionContext}; @@ -25,11 +21,9 @@ use datafusion_proto::{ }, protobuf::{Partitioning, PhysicalSortExprNodeCollection}, }; -use futures::{StreamExt, TryStreamExt}; use prost::Message; -use tokio::runtime::Runtime; -use super::record_batch_stream::record_batch_to_arrow_stream; +use super::record_batch_stream::{record_batch_to_arrow_stream, ConsumerRecordBatchStream}; #[repr(C)] #[derive(Debug)] @@ -51,7 +45,7 @@ pub struct FFI_ExecutionPlan { plan: *const FFI_ExecutionPlan, partition: usize, err_code: &mut i32, - ) -> *const FFI_ArrowArrayStream, + ) -> FFI_ArrowArrayStream, pub private_data: *mut c_void, } @@ -91,23 +85,21 @@ unsafe extern "C" fn execute_fn_wrapper( plan: *const FFI_ExecutionPlan, partition: usize, err_code: &mut i32, -) -> *const FFI_ArrowArrayStream { +) -> FFI_ArrowArrayStream { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - let mut record_batch_stream = match (*private_data) + let record_batch_stream = match (*private_data) .plan .execute(partition, (*private_data).context.clone()) { Ok(rbs) => rbs, Err(_e) => { *err_code = 1; - return null(); + return FFI_ArrowArrayStream::empty(); } }; - let ffi_stream = Box::new(record_batch_to_arrow_stream(record_batch_stream)); - - Box::into_raw(ffi_stream) + record_batch_to_arrow_stream(record_batch_stream) } unsafe extern "C" fn name_fn_wrapper(plan: *const FFI_ExecutionPlan) -> *const c_char { let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; @@ -215,7 +207,7 @@ impl ExportedExecutionPlan { ))?; let mut num_children = 0; let mut err_code = 0; - let mut children_ptr = children_fn(plan, &mut num_children, &mut err_code); + let children_ptr = children_fn(plan, &mut num_children, &mut err_code); println!( "We called the FFI function children so the provider told us we have {} children", @@ -284,25 +276,32 @@ impl ExecutionPlan for ExportedExecutionPlan { self: Arc, children: Vec>, ) -> datafusion::error::Result> { - todo!() + Ok(Arc::new(ExportedExecutionPlan { + plan: self.plan, + name: self.name.clone(), + children, + properties: self.properties.clone(), + })) } fn execute( &self, partition: usize, - context: Arc, + _context: Arc, ) -> datafusion::error::Result { - todo!() - } -} - -impl DisplayAs for FFI_ExecutionPlan { - fn fmt_as( - &self, - t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - todo!() + unsafe { + let execute_fn = (*self.plan).execute; + let mut err_code = 0; + let arrow_stream = execute_fn(self.plan, partition, &mut err_code); + + match err_code { + 0 => ConsumerRecordBatchStream::try_from(arrow_stream) + .map(|v| Pin::new(Box::new(v)) as SendableRecordBatchStream), + _ => Err(DataFusionError::Execution( + "Error occurred during FFI call to FFI_ExecutionPlan execute.".to_string(), + )), + } + } } } @@ -311,9 +310,6 @@ impl DisplayAs for FFI_ExecutionPlan { #[allow(missing_docs)] #[allow(non_camel_case_types)] pub struct FFI_PlanProperties { - // We will build equivalence properties from teh schema and ordersing (new_with_orderings). This is how we do ti in dataset_exec - // pub eq_properties: Option EquivalenceProperties>, - // Returns protobuf serialized bytes of the partitioning pub output_partitioning: Option< unsafe extern "C" fn( diff --git a/src/ffi/record_batch_stream.rs b/src/ffi/record_batch_stream.rs index 8fb6cb159..5e1d4ff2a 100644 --- a/src/ffi/record_batch_stream.rs +++ b/src/ffi/record_batch_stream.rs @@ -3,14 +3,21 @@ use std::{ ptr::addr_of, }; -use arrow::array::Array; use arrow::{ array::StructArray, ffi::{FFI_ArrowArray, FFI_ArrowSchema}, ffi_stream::FFI_ArrowArrayStream, }; -use datafusion::execution::SendableRecordBatchStream; -use futures::{executor::block_on, TryStreamExt}; +use arrow::{ + array::{Array, RecordBatch, RecordBatchReader}, + ffi_stream::ArrowArrayStreamReader, +}; +use datafusion::error::Result; +use datafusion::{ + error::DataFusionError, + execution::{RecordBatchStream, SendableRecordBatchStream}, +}; +use futures::{executor::block_on, Stream, TryStreamExt}; pub fn record_batch_to_arrow_stream(stream: SendableRecordBatchStream) -> FFI_ArrowArrayStream { let private_data = Box::new(RecoredBatchStreamPrivateData { @@ -32,11 +39,6 @@ struct RecoredBatchStreamPrivateData { last_error: Option, } -const ENOMEM: i32 = 12; -const EIO: i32 = 5; -const EINVAL: i32 = 22; -const ENOSYS: i32 = 78; - // callback used to drop [FFI_ArrowArrayStream] when it is exported. unsafe extern "C" fn release_stream(stream: *mut FFI_ArrowArrayStream) { if stream.is_null() { @@ -143,274 +145,38 @@ impl ExportedRecordBatchStream { } } -// /// A `RecordBatchReader` which imports Arrays from `FFI_ArrowArrayStream`. -// /// Struct used to fetch `RecordBatch` from the C Stream Interface. -// /// Its main responsibility is to expose `RecordBatchReader` functionality -// /// that requires [FFI_ArrowArrayStream]. -// #[derive(Debug)] -// pub struct ArrowArrayStreamReader { -// stream: FFI_ArrowArrayStream, -// schema: SchemaRef, -// } - -// /// Gets schema from a raw pointer of `FFI_ArrowArrayStream`. This is used when constructing -// /// `ArrowArrayStreamReader` to cache schema. -// fn get_stream_schema(stream_ptr: *mut FFI_ArrowArrayStream) -> Result { -// let mut schema = FFI_ArrowSchema::empty(); - -// let ret_code = unsafe { (*stream_ptr).get_schema.unwrap()(stream_ptr, &mut schema) }; - -// if ret_code == 0 { -// let schema = Schema::try_from(&schema)?; -// Ok(Arc::new(schema)) -// } else { -// Err(ArrowError::CDataInterface(format!( -// "Cannot get schema from input stream. Error code: {ret_code:?}" -// ))) -// } -// } - -// impl ArrowArrayStreamReader { -// /// Creates a new `ArrowArrayStreamReader` from a `FFI_ArrowArrayStream`. -// /// This is used to import from the C Stream Interface. -// #[allow(dead_code)] -// pub fn try_new(mut stream: FFI_ArrowArrayStream) -> Result { -// if stream.release.is_none() { -// return Err(ArrowError::CDataInterface( -// "input stream is already released".to_string(), -// )); -// } - -// let schema = get_stream_schema(&mut stream)?; - -// Ok(Self { stream, schema }) -// } - -// /// Creates a new `ArrowArrayStreamReader` from a raw pointer of `FFI_ArrowArrayStream`. -// /// -// /// Assumes that the pointer represents valid C Stream Interfaces. -// /// This function copies the content from the raw pointer and cleans up it to prevent -// /// double-dropping. The caller is responsible for freeing up the memory allocated for -// /// the pointer. -// /// -// /// # Safety -// /// -// /// See [`FFI_ArrowArrayStream::from_raw`] -// pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Result { -// Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream)) -// } - -// /// Get the last error from `ArrowArrayStreamReader` -// fn get_stream_last_error(&mut self) -> Option { -// let get_last_error = self.stream.get_last_error?; - -// let error_str = unsafe { get_last_error(&mut self.stream) }; -// if error_str.is_null() { -// return None; -// } - -// let error_str = unsafe { CStr::from_ptr(error_str) }; -// Some(error_str.to_string_lossy().to_string()) -// } -// } - -// impl Iterator for ArrowArrayStreamReader { -// type Item = Result; - -// fn next(&mut self) -> Option { -// let mut array = FFI_ArrowArray::empty(); - -// let ret_code = unsafe { self.stream.get_next.unwrap()(&mut self.stream, &mut array) }; - -// if ret_code == 0 { -// // The end of stream has been reached -// if array.is_released() { -// return None; -// } - -// let result = unsafe { -// from_ffi_and_data_type(array, DataType::Struct(self.schema().fields().clone())) -// }; -// Some(result.map(|data| RecordBatch::from(StructArray::from(data)))) -// } else { -// let last_error = self.get_stream_last_error(); -// let err = ArrowError::CDataInterface(last_error.unwrap()); -// Some(Err(err)) -// } -// } -// } - -// impl RecordBatchReader for ArrowArrayStreamReader { -// fn schema(&self) -> SchemaRef { -// self.schema.clone() -// } -// } - -// /// Exports a record batch reader to raw pointer of the C Stream Interface provided by the consumer. -// /// -// /// # Safety -// /// Assumes that the pointer represents valid C Stream Interfaces, both in memory -// /// representation and lifetime via the `release` mechanism. -// #[deprecated(note = "Use FFI_ArrowArrayStream::new")] -// pub unsafe fn export_reader_into_raw( -// reader: Box, -// out_stream: *mut FFI_ArrowArrayStream, -// ) { -// let stream = FFI_ArrowArrayStream::new(reader); - -// std::ptr::write_unaligned(out_stream, stream); -// } - -// #[cfg(test)] -// mod tests { -// use super::*; - -// use arrow_schema::Field; - -// use crate::array::Int32Array; -// use crate::ffi::from_ffi; - -// struct TestRecordBatchReader { -// schema: SchemaRef, -// iter: Box> + Send>, -// } - -// impl TestRecordBatchReader { -// pub fn new( -// schema: SchemaRef, -// iter: Box> + Send>, -// ) -> Box { -// Box::new(TestRecordBatchReader { schema, iter }) -// } -// } - -// impl Iterator for TestRecordBatchReader { -// type Item = Result; - -// fn next(&mut self) -> Option { -// self.iter.next() -// } -// } - -// impl RecordBatchReader for TestRecordBatchReader { -// fn schema(&self) -> SchemaRef { -// self.schema.clone() -// } -// } - -// fn _test_round_trip_export(arrays: Vec>) -> Result<()> { -// let schema = Arc::new(Schema::new(vec![ -// Field::new("a", arrays[0].data_type().clone(), true), -// Field::new("b", arrays[1].data_type().clone(), true), -// Field::new("c", arrays[2].data_type().clone(), true), -// ])); -// let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap(); -// let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _; - -// let reader = TestRecordBatchReader::new(schema.clone(), iter); - -// // Export a `RecordBatchReader` through `FFI_ArrowArrayStream` -// let mut ffi_stream = FFI_ArrowArrayStream::new(reader); - -// // Get schema from `FFI_ArrowArrayStream` -// let mut ffi_schema = FFI_ArrowSchema::empty(); -// let ret_code = unsafe { get_schema(&mut ffi_stream, &mut ffi_schema) }; -// assert_eq!(ret_code, 0); - -// let exported_schema = Schema::try_from(&ffi_schema).unwrap(); -// assert_eq!(&exported_schema, schema.as_ref()); - -// // Get array from `FFI_ArrowArrayStream` -// let mut produced_batches = vec![]; -// loop { -// let mut ffi_array = FFI_ArrowArray::empty(); -// let ret_code = unsafe { get_next(&mut ffi_stream, &mut ffi_array) }; -// assert_eq!(ret_code, 0); - -// // The end of stream has been reached -// if ffi_array.is_released() { -// break; -// } - -// let array = unsafe { from_ffi(ffi_array, &ffi_schema) }.unwrap(); - -// let record_batch = RecordBatch::from(StructArray::from(array)); -// produced_batches.push(record_batch); -// } - -// assert_eq!(produced_batches, vec![batch.clone(), batch]); - -// Ok(()) -// } - -// fn _test_round_trip_import(arrays: Vec>) -> Result<()> { -// let schema = Arc::new(Schema::new(vec![ -// Field::new("a", arrays[0].data_type().clone(), true), -// Field::new("b", arrays[1].data_type().clone(), true), -// Field::new("c", arrays[2].data_type().clone(), true), -// ])); -// let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap(); -// let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _; - -// let reader = TestRecordBatchReader::new(schema.clone(), iter); - -// // Import through `FFI_ArrowArrayStream` as `ArrowArrayStreamReader` -// let stream = FFI_ArrowArrayStream::new(reader); -// let stream_reader = ArrowArrayStreamReader::try_new(stream).unwrap(); - -// let imported_schema = stream_reader.schema(); -// assert_eq!(imported_schema, schema); - -// let mut produced_batches = vec![]; -// for batch in stream_reader { -// produced_batches.push(batch.unwrap()); -// } - -// assert_eq!(produced_batches, vec![batch.clone(), batch]); - -// Ok(()) -// } - -// #[test] -// fn test_stream_round_trip_export() -> Result<()> { -// let array = Int32Array::from(vec![Some(2), None, Some(1), None]); -// let array: Arc = Arc::new(array); - -// _test_round_trip_export(vec![array.clone(), array.clone(), array]) -// } - -// #[test] -// fn test_stream_round_trip_import() -> Result<()> { -// let array = Int32Array::from(vec![Some(2), None, Some(1), None]); -// let array: Arc = Arc::new(array); - -// _test_round_trip_import(vec![array.clone(), array.clone(), array]) -// } - -// #[test] -// fn test_error_import() -> Result<()> { -// let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)])); +pub struct ConsumerRecordBatchStream { + reader: ArrowArrayStreamReader, +} -// let iter = Box::new(vec![Err(ArrowError::MemoryError("".to_string()))].into_iter()); +impl TryFrom for ConsumerRecordBatchStream { + type Error = DataFusionError; -// let reader = TestRecordBatchReader::new(schema.clone(), iter); + fn try_from(value: FFI_ArrowArrayStream) -> std::result::Result { + let reader = ArrowArrayStreamReader::try_new(value)?; -// // Import through `FFI_ArrowArrayStream` as `ArrowArrayStreamReader` -// let stream = FFI_ArrowArrayStream::new(reader); -// let stream_reader = ArrowArrayStreamReader::try_new(stream).unwrap(); + Ok(Self { reader }) + } +} -// let imported_schema = stream_reader.schema(); -// assert_eq!(imported_schema, schema); +impl RecordBatchStream for ConsumerRecordBatchStream { + fn schema(&self) -> arrow::datatypes::SchemaRef { + self.reader.schema() + } +} -// let mut produced_batches = vec![]; -// for batch in stream_reader { -// produced_batches.push(batch); -// } +impl Stream for ConsumerRecordBatchStream { + type Item = Result; -// // The results should outlive the lifetime of the stream itself. -// assert_eq!(produced_batches.len(), 1); -// assert!(produced_batches[0].is_err()); + fn poll_next( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + let batch = self + .reader + .next() + .map(|v| v.map_err(|e| DataFusionError::ArrowError(e, None))); -// Ok(()) -// } -// } + std::task::Poll::Ready(batch) + } +} diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs index d38e0008f..85d15a142 100644 --- a/src/ffi/table_provider.rs +++ b/src/ffi/table_provider.rs @@ -7,7 +7,6 @@ use std::{ use arrow::{ datatypes::{Schema, SchemaRef}, - error::ArrowError, ffi::FFI_ArrowSchema, }; use async_trait::async_trait; @@ -16,7 +15,7 @@ use datafusion::{ common::DFSchema, datasource::TableType, error::DataFusionError, - execution::{context::SessionState, session_state::SessionStateBuilder, TaskContext}, + execution::session_state::SessionStateBuilder, logical_expr::TableProviderFilterPushDown, physical_plan::ExecutionPlan, prelude::{Expr, SessionContext}, @@ -56,7 +55,6 @@ unsafe impl Sync for FFI_TableProvider {} struct ProviderPrivateData { provider: Box, - last_error: Option, } struct ExportedTableProvider(*const FFI_TableProvider); @@ -160,27 +158,10 @@ impl ExportedTableProvider { } } -const ENOMEM: i32 = 12; -const EIO: i32 = 5; -const EINVAL: i32 = 22; -const ENOSYS: i32 = 78; - -fn get_error_code(err: &ArrowError) -> i32 { - match err { - ArrowError::NotYetImplemented(_) => ENOSYS, - ArrowError::MemoryError(_) => ENOMEM, - ArrowError::IoError(_, _) => EIO, - _ => EINVAL, - } -} - impl FFI_TableProvider { /// Creates a new [`FFI_TableProvider`]. pub fn new(provider: Box) -> Self { - let private_data = Box::new(ProviderPrivateData { - provider, - last_error: None, - }); + let private_data = Box::new(ProviderPrivateData { provider }); Self { version: 2, From d1223930934068844e1ce994ff90004c7b2e8ccb Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 14 Oct 2024 11:07:12 -0400 Subject: [PATCH 12/28] Move ffi module to datafusion core --- Cargo.lock | 631 ++++++++++++++++++++++++++++----- Cargo.toml | 1 + src/context.rs | 6 +- src/ffi/execution_plan.rs | 573 ------------------------------ src/ffi/mod.rs | 21 -- src/ffi/record_batch_stream.rs | 182 ---------- src/ffi/session_config.rs | 49 --- src/ffi/table_provider.rs | 297 ---------------- src/lib.rs | 2 - 9 files changed, 546 insertions(+), 1216 deletions(-) delete mode 100644 src/ffi/execution_plan.rs delete mode 100644 src/ffi/mod.rs delete mode 100644 src/ffi/record_batch_stream.rs delete mode 100644 src/ffi/session_config.rs delete mode 100644 src/ffi/table_provider.rs diff --git a/Cargo.lock b/Cargo.lock index 497c5b850..7b57b330a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,54 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "abi_stable" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" +dependencies = [ + "abi_stable_derive", + "abi_stable_shared", + "const_panic", + "core_extensions", + "crossbeam-channel", + "generational-arena", + "libloading", + "lock_api", + "parking_lot", + "paste", + "repr_offset", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "abi_stable_derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" +dependencies = [ + "abi_stable_shared", + "as_derive_utils", + "core_extensions", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "typed-arena", +] + +[[package]] +name = "abi_stable_shared" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" +dependencies = [ + "core_extensions", +] + [[package]] name = "addr2line" version = "0.24.2" @@ -63,9 +111,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "611cc2ae7d2e242c457e4be7f97036b8ad9ca152b499f53faf99b1ed8fc2553f" [[package]] name = "android-tzdata" @@ -84,9 +132,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "apache-avro" @@ -349,6 +397,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "as_derive_utils" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" +dependencies = [ + "core_extensions", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "async-compression" version = "0.4.17" @@ -367,6 +427,15 @@ dependencies = [ "zstd-safe 7.2.1", ] +[[package]] +name = "async-ffi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" +dependencies = [ + "abi_stable", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -375,7 +444,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -386,7 +455,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -542,9 +611,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.31" +version = "1.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" dependencies = [ "jobserver", "libc", @@ -637,6 +706,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_panic" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -668,6 +743,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "core_extensions" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c71dc07c9721607e7a16108336048ee978c3a8b129294534272e8bac96c0ee" +dependencies = [ + "core_extensions_proc_macros", +] + +[[package]] +name = "core_extensions_proc_macros" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" + [[package]] name = "cpufeatures" version = "0.2.14" @@ -686,6 +776,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.20" @@ -917,6 +1016,24 @@ dependencies = [ "paste", ] +[[package]] +name = "datafusion-ffi" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +dependencies = [ + "abi_stable", + "arrow", + "async-ffi", + "async-trait", + "datafusion", + "datafusion-proto", + "doc-comment", + "futures", + "log", + "prost", +] + [[package]] name = "datafusion-functions" version = "43.0.0" @@ -1176,6 +1293,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-ffi", "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", @@ -1238,6 +1356,23 @@ dependencies = [ "subtle", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "dyn-clone" version = "1.0.17" @@ -1268,9 +1403,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fixedbitset" @@ -1369,7 +1504,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -1402,6 +1537,15 @@ dependencies = [ "slab", ] +[[package]] +name = "generational-arena" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" +dependencies = [ + "cfg-if", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1477,9 +1621,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" [[package]] name = "heck" @@ -1631,14 +1775,143 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -1648,7 +1921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown 0.15.1", ] [[package]] @@ -1786,9 +2059,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.161" +version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "libflate" @@ -1814,6 +2087,16 @@ dependencies = [ "rle-decode-fast", ] +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libm" version = "0.2.11" @@ -1836,6 +2119,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -2289,7 +2578,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.87", ] [[package]] @@ -2328,7 +2617,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn", + "syn 2.0.87", "tempfile", ] @@ -2342,7 +2631,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2365,9 +2654,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" dependencies = [ "cfg-if", "indoc", @@ -2383,9 +2672,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" dependencies = [ "once_cell", "target-lexicon", @@ -2393,9 +2682,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" dependencies = [ "libc", "pyo3-build-config", @@ -2403,34 +2692,34 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 2.0.87", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn", + "syn 2.0.87", ] [[package]] name = "quad-rand" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" @@ -2479,9 +2768,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" dependencies = [ "cfg_aliases", "libc", @@ -2584,6 +2873,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "repr_offset" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" +dependencies = [ + "tstr", +] + [[package]] name = "reqwest" version = "0.12.9" @@ -2673,9 +2971,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.38" +version = "0.38.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" dependencies = [ "bitflags 2.6.0", "errno", @@ -2788,7 +3086,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.87", ] [[package]] @@ -2812,9 +3110,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -2852,7 +3150,7 @@ checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2863,7 +3161,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2887,7 +3185,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn", + "syn 2.0.87", ] [[package]] @@ -2971,7 +3269,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3014,9 +3312,15 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -3048,7 +3352,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.87", ] [[package]] @@ -3061,7 +3365,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.87", ] [[package]] @@ -3085,7 +3389,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn", + "syn 2.0.87", "typify", "walkdir", ] @@ -3098,9 +3402,20 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.85" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -3116,6 +3431,17 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -3124,9 +3450,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -3137,22 +3463,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.65" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.65" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3175,6 +3501,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -3192,9 +3528,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.0" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ "backtrace", "bytes", @@ -3214,7 +3550,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3266,7 +3602,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3284,6 +3620,21 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tstr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" +dependencies = [ + "tstr_proc_macros", +] + +[[package]] +name = "tstr_proc_macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" + [[package]] name = "twox-hash" version = "1.6.3" @@ -3294,6 +3645,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typed-builder" version = "0.16.2" @@ -3311,7 +3668,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3345,7 +3702,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn", + "syn 2.0.87", "thiserror", "unicode-ident", ] @@ -3363,31 +3720,16 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn", + "syn 2.0.87", "typify-impl", ] -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" - [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -3420,15 +3762,27 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.11.0" @@ -3492,7 +3846,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -3526,7 +3880,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3560,6 +3914,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -3569,6 +3939,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" @@ -3690,6 +4066,18 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xz2" version = "0.1.7" @@ -3699,6 +4087,30 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -3717,7 +4129,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", ] [[package]] @@ -3726,6 +4159,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index 11ce08c75..f28830843 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expre datafusion-substrait = { version = "43.0.0", optional = true } datafusion-proto = { version = "43.0.0" } datafusion-functions-window-common = { version = "43.0.0" } +datafusion-ffi = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } diff --git a/src/context.rs b/src/context.rs index cc01b3217..9fcb84845 100644 --- a/src/context.rs +++ b/src/context.rs @@ -38,7 +38,6 @@ use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; use crate::expr::sort_expr::PySortExpr; use crate::expr::PyExpr; -use crate::ffi::table_provider::FFI_TableProvider; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -70,6 +69,7 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; +use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; use tokio::task::JoinHandle; @@ -582,9 +582,7 @@ impl PySessionContext { // validate_pycapsule(capsule, "arrow_array_stream")?; let provider = unsafe { FFI_TableProvider::from_raw(capsule.pointer() as _) }; - - println!("Found provider version {}", provider.version); - + let provider = ForeignTableProvider::new(provider); let schema = provider.schema(); println!("Got schema through TableProvider trait."); diff --git a/src/ffi/execution_plan.rs b/src/ffi/execution_plan.rs deleted file mode 100644 index b04709ae8..000000000 --- a/src/ffi/execution_plan.rs +++ /dev/null @@ -1,573 +0,0 @@ -use std::{ - ffi::{c_char, c_void, CString}, - pin::Pin, - ptr::null_mut, - slice, - sync::Arc, -}; - -use arrow::{datatypes::Schema, ffi::FFI_ArrowSchema, ffi_stream::FFI_ArrowArrayStream}; -use datafusion::{ - error::DataFusionError, - execution::{SendableRecordBatchStream, TaskContext}, - physical_plan::{DisplayAs, ExecutionMode, ExecutionPlan, PlanProperties}, -}; -use datafusion::{error::Result, physical_expr::EquivalenceProperties, prelude::SessionContext}; -use datafusion_proto::{ - physical_plan::{ - from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning}, - to_proto::{serialize_partitioning, serialize_physical_sort_exprs}, - DefaultPhysicalExtensionCodec, - }, - protobuf::{Partitioning, PhysicalSortExprNodeCollection}, -}; -use prost::Message; - -use super::record_batch_stream::{record_batch_to_arrow_stream, ConsumerRecordBatchStream}; - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_ExecutionPlan { - pub properties: - Option FFI_PlanProperties>, - pub children: Option< - unsafe extern "C" fn( - plan: *const FFI_ExecutionPlan, - num_children: &mut usize, - err_code: &mut i32, - ) -> *mut *const FFI_ExecutionPlan, - >, - pub name: unsafe extern "C" fn(plan: *const FFI_ExecutionPlan) -> *const c_char, - - pub execute: unsafe extern "C" fn( - plan: *const FFI_ExecutionPlan, - partition: usize, - err_code: &mut i32, - ) -> FFI_ArrowArrayStream, - - pub private_data: *mut c_void, -} - -pub struct ExecutionPlanPrivateData { - pub plan: Arc, - pub last_error: Option, - pub children: Vec<*const FFI_ExecutionPlan>, - pub context: Arc, -} - -unsafe extern "C" fn properties_fn_wrapper(plan: *const FFI_ExecutionPlan) -> FFI_PlanProperties { - let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - let properties = (*private_data).plan.properties(); - properties.clone().into() -} - -unsafe extern "C" fn children_fn_wrapper( - plan: *const FFI_ExecutionPlan, - num_children: &mut usize, - err_code: &mut i32, -) -> *mut *const FFI_ExecutionPlan { - let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - - *num_children = (*private_data).children.len(); - *err_code = 0; - - let mut children: Vec<_> = (*private_data).children.to_owned(); - let children_ptr = children.as_mut_ptr(); - - std::mem::forget(children); - - children_ptr -} - -unsafe extern "C" fn execute_fn_wrapper( - plan: *const FFI_ExecutionPlan, - partition: usize, - err_code: &mut i32, -) -> FFI_ArrowArrayStream { - let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - - let record_batch_stream = match (*private_data) - .plan - .execute(partition, (*private_data).context.clone()) - { - Ok(rbs) => rbs, - Err(_e) => { - *err_code = 1; - return FFI_ArrowArrayStream::empty(); - } - }; - - record_batch_to_arrow_stream(record_batch_stream) -} -unsafe extern "C" fn name_fn_wrapper(plan: *const FFI_ExecutionPlan) -> *const c_char { - let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - - let name = (*private_data).plan.name(); - - CString::new(name) - .unwrap_or(CString::new("unable to parse execution plan name").unwrap()) - .into_raw() -} - -// Since the trait ExecutionPlan requires borrowed values, we wrap our FFI. -// This struct exists on the consumer side (datafusion-python, for example) and not -// in the provider's side. -#[derive(Debug)] -pub struct ExportedExecutionPlan { - name: String, - plan: *const FFI_ExecutionPlan, - properties: PlanProperties, - children: Vec>, -} - -unsafe impl Send for ExportedExecutionPlan {} -unsafe impl Sync for ExportedExecutionPlan {} - -impl DisplayAs for ExportedExecutionPlan { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!( - f, - "FFI_ExecutionPlan(number_of_children={})", - self.children.len(), - ) - } -} - -impl FFI_ExecutionPlan { - /// This function is called on the provider's side. - pub fn new(plan: Arc, context: Arc) -> Self { - let children = plan - .children() - .into_iter() - .map(|child| Box::new(FFI_ExecutionPlan::new(child.clone(), context.clone()))) - .map(|child| Box::into_raw(child) as *const FFI_ExecutionPlan) - .collect(); - println!("children collected"); - - let private_data = Box::new(ExecutionPlanPrivateData { - plan, - children, - context, - last_error: None, - }); - println!("generated private data, ready to return"); - - Self { - properties: Some(properties_fn_wrapper), - children: Some(children_fn_wrapper), - name: name_fn_wrapper, - execute: execute_fn_wrapper, - private_data: Box::into_raw(private_data) as *mut c_void, - } - } - - // pub fn empty() -> Self { - // Self { - // properties: None, - // children: None, - // private_data: std::ptr::null_mut(), - // } - // } -} - -impl ExportedExecutionPlan { - /// Wrap a FFI Execution Plan - /// - /// # Safety - /// - /// The caller must ensure the pointer provided points to a valid implementation - /// of FFI_ExecutionPlan - pub unsafe fn new(plan: *const FFI_ExecutionPlan) -> Result { - let name_fn = (*plan).name; - let name_cstr = name_fn(plan); - let name = CString::from_raw(name_cstr as *mut c_char) - .to_str() - .unwrap_or("Unable to parse FFI_ExecutionPlan name") - .to_string(); - - println!("entered ExportedExecutionPlan::new"); - let properties = unsafe { - let properties_fn = (*plan).properties.ok_or(DataFusionError::NotImplemented( - "properties not implemented on FFI_ExecutionPlan".to_string(), - ))?; - println!("About to call properties fn"); - properties_fn(plan).try_into()? - }; - - println!("created properties"); - let children = unsafe { - let children_fn = (*plan).children.ok_or(DataFusionError::NotImplemented( - "children not implemented on FFI_ExecutionPlan".to_string(), - ))?; - let mut num_children = 0; - let mut err_code = 0; - let children_ptr = children_fn(plan, &mut num_children, &mut err_code); - - println!( - "We called the FFI function children so the provider told us we have {} children", - num_children - ); - - if err_code != 0 { - return Err(DataFusionError::Plan( - "Error getting children for FFI_ExecutionPlan".to_string(), - )); - } - - let ffi_vec = Vec::from_raw_parts(children_ptr, num_children, num_children); - let maybe_children: Result> = ffi_vec - .into_iter() - .map(|child| { - println!("Ok, we are about to examine a child ffi_executionplan"); - if let Some(props_fn) = (*child).properties { - println!("We do have properties on the child "); - let child_props = props_fn(child); - println!("Child schema {:?}", child_props.schema); - } - - let child_plan = ExportedExecutionPlan::new(child); - - child_plan.map(|c| Arc::new(c) as Arc) - }) - .collect(); - println!("finsihed maybe children"); - - maybe_children? - }; - - println!("About to return ExportedExecurtionPlan"); - - Ok(Self { - name, - plan, - properties, - children, - }) - } -} - -impl ExecutionPlan for ExportedExecutionPlan { - fn name(&self) -> &str { - &self.name - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn properties(&self) -> &datafusion::physical_plan::PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - self.children - .iter() - .map(|p| p as &Arc) - .collect() - } - - fn with_new_children( - self: Arc, - children: Vec>, - ) -> datafusion::error::Result> { - Ok(Arc::new(ExportedExecutionPlan { - plan: self.plan, - name: self.name.clone(), - children, - properties: self.properties.clone(), - })) - } - - fn execute( - &self, - partition: usize, - _context: Arc, - ) -> datafusion::error::Result { - unsafe { - let execute_fn = (*self.plan).execute; - let mut err_code = 0; - let arrow_stream = execute_fn(self.plan, partition, &mut err_code); - - match err_code { - 0 => ConsumerRecordBatchStream::try_from(arrow_stream) - .map(|v| Pin::new(Box::new(v)) as SendableRecordBatchStream), - _ => Err(DataFusionError::Execution( - "Error occurred during FFI call to FFI_ExecutionPlan execute.".to_string(), - )), - } - } - } -} - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_PlanProperties { - // Returns protobuf serialized bytes of the partitioning - pub output_partitioning: Option< - unsafe extern "C" fn( - plan: *const FFI_PlanProperties, - buffer_size: &mut usize, - buffer_bytes: &mut *mut u8, - ) -> i32, - >, - - pub execution_mode: - Option FFI_ExecutionMode>, - - // PhysicalSortExprNodeCollection proto - pub output_ordering: Option< - unsafe extern "C" fn( - plan: *const FFI_PlanProperties, - buffer_size: &mut usize, - buffer_bytes: &mut *mut u8, - ) -> i32, - >, - - pub schema: Option FFI_ArrowSchema>, - - pub private_data: *mut c_void, -} - -unsafe extern "C" fn output_partitioning_fn_wrapper( - properties: *const FFI_PlanProperties, - buffer_size: &mut usize, - buffer_bytes: &mut *mut u8, -) -> i32 { - // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - // let properties = (*private_data).plan.properties(); - // properties.clone().into() - let private_data = (*properties).private_data as *const PlanProperties; - let partitioning = (*private_data).output_partitioning(); - - let codec = DefaultPhysicalExtensionCodec {}; - let partitioning_data = match serialize_partitioning(partitioning, &codec) { - Ok(p) => p, - Err(_) => return 1, - }; - - let mut partition_bytes = partitioning_data.encode_to_vec(); - *buffer_size = partition_bytes.len(); - *buffer_bytes = partition_bytes.as_mut_ptr(); - - std::mem::forget(partition_bytes); - - 0 -} - -unsafe extern "C" fn execution_mode_fn_wrapper( - properties: *const FFI_PlanProperties, -) -> FFI_ExecutionMode { - // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - // let properties = (*private_data).plan.properties(); - // properties.clone().into() - let private_data = (*properties).private_data as *const PlanProperties; - let execution_mode = (*private_data).execution_mode(); - - execution_mode.into() -} - -unsafe extern "C" fn output_ordering_fn_wrapper( - properties: *const FFI_PlanProperties, - buffer_size: &mut usize, - buffer_bytes: &mut *mut u8, -) -> i32 { - // let private_data = (*plan).private_data as *const ExecutionPlanPrivateData; - // let properties = (*private_data).plan.properties(); - // properties.clone().into() - let private_data = (*properties).private_data as *const PlanProperties; - let output_ordering = match (*private_data).output_ordering() { - Some(o) => o, - None => { - *buffer_size = 0; - return 0; - } - } - .to_owned(); - - let codec = DefaultPhysicalExtensionCodec {}; - let physical_sort_expr_nodes = match serialize_physical_sort_exprs(output_ordering, &codec) { - Ok(p) => p, - Err(_) => return 1, - }; - - let ordering_data = PhysicalSortExprNodeCollection { - physical_sort_expr_nodes, - }; - - let mut ordering_bytes = ordering_data.encode_to_vec(); - *buffer_size = ordering_bytes.len(); - *buffer_bytes = ordering_bytes.as_mut_ptr(); - std::mem::forget(ordering_bytes); - - 0 -} - -// pub schema: Option FFI_ArrowSchema>, -unsafe extern "C" fn schema_fn_wrapper(properties: *const FFI_PlanProperties) -> FFI_ArrowSchema { - let private_data = (*properties).private_data as *const PlanProperties; - let schema = (*private_data).eq_properties.schema(); - - // This does silently fail because TableProvider does not return a result - // so we expect it to always pass. Maybe some logging should be added. - FFI_ArrowSchema::try_from(schema.as_ref()).unwrap_or(FFI_ArrowSchema::empty()) -} - -impl From for FFI_PlanProperties { - fn from(value: PlanProperties) -> Self { - let private_data = Box::new(value); - - Self { - output_partitioning: Some(output_partitioning_fn_wrapper), - execution_mode: Some(execution_mode_fn_wrapper), - output_ordering: Some(output_ordering_fn_wrapper), - schema: Some(schema_fn_wrapper), - private_data: Box::into_raw(private_data) as *mut c_void, - } - } -} - -// /// Creates a new [`FFI_TableProvider`]. -// pub fn new(provider: Box) -> Self { -// let private_data = Box::new(ProviderPrivateData { -// provider, -// last_error: None, -// }); - -// Self { -// version: 2, -// schema: Some(provider_schema), -// scan: Some(provider_scan), -// private_data: Box::into_raw(private_data) as *mut c_void, -// } -// } - -impl TryFrom for PlanProperties { - type Error = DataFusionError; - - fn try_from(value: FFI_PlanProperties) -> std::result::Result { - unsafe { - let schema_fn = value.schema.ok_or(DataFusionError::NotImplemented( - "schema() not implemented on FFI_PlanProperties".to_string(), - ))?; - let ffi_schema = schema_fn(&value); - let schema: Schema = (&ffi_schema).try_into()?; - - let ordering_fn = value - .output_ordering - .ok_or(DataFusionError::NotImplemented( - "output_ordering() not implemented on FFI_PlanProperties".to_string(), - ))?; - let mut buff_size = 0; - let mut buff = null_mut(); - if ordering_fn(&value, &mut buff_size, &mut buff) != 0 { - return Err(DataFusionError::Plan( - "Error occurred during FFI call to output_ordering in FFI_PlanProperties" - .to_string(), - )); - } - - // TODO we will need to get these, but unsure if it happesn on the provider or consumer right now. - let default_ctx = SessionContext::new(); - let codex = DefaultPhysicalExtensionCodec {}; - - let orderings = match buff_size == 0 { - true => None, - false => { - let data = slice::from_raw_parts(buff, buff_size); - - let proto_output_ordering = PhysicalSortExprNodeCollection::decode(data) - .map_err(|e| DataFusionError::External(Box::new(e)))?; - - Some(parse_physical_sort_exprs( - &proto_output_ordering.physical_sort_expr_nodes, - &default_ctx, - &schema, - &codex, - )?) - } - }; - - let partitioning_fn = - value - .output_partitioning - .ok_or(DataFusionError::NotImplemented( - "output_partitioning() not implemented on FFI_PlanProperties".to_string(), - ))?; - if partitioning_fn(&value, &mut buff_size, &mut buff) != 0 { - return Err(DataFusionError::Plan( - "Error occurred during FFI call to output_partitioning in FFI_PlanProperties" - .to_string(), - )); - } - let data = slice::from_raw_parts(buff, buff_size); - - let proto_partitioning = - Partitioning::decode(data).map_err(|e| DataFusionError::External(Box::new(e)))?; - // TODO: Validate this unwrap is safe. - let partitioning = parse_protobuf_partitioning( - Some(&proto_partitioning), - &default_ctx, - &schema, - &codex, - )? - .unwrap(); - - let execution_mode_fn = value.execution_mode.ok_or(DataFusionError::NotImplemented( - "execution_mode() not implemented on FFI_PlanProperties".to_string(), - ))?; - let execution_mode = execution_mode_fn(&value).into(); - - let eq_properties = match orderings { - Some(ordering) => { - EquivalenceProperties::new_with_orderings(Arc::new(schema), &[ordering]) - } - None => EquivalenceProperties::new(Arc::new(schema)), - }; - - Ok(Self::new(eq_properties, partitioning, execution_mode)) - } - } - // fn from(value: FFI_PlanProperties) -> Self { - // let schema = self.schema() - - // let equiv_prop = EquivalenceProperties::new_with_orderings(schema, orderings); - // } -} - -#[repr(C)] -#[allow(non_camel_case_types)] -pub enum FFI_ExecutionMode { - Bounded, - - Unbounded, - - PipelineBreaking, -} - -impl From for FFI_ExecutionMode { - fn from(value: ExecutionMode) -> Self { - match value { - ExecutionMode::Bounded => FFI_ExecutionMode::Bounded, - ExecutionMode::Unbounded => FFI_ExecutionMode::Unbounded, - ExecutionMode::PipelineBreaking => FFI_ExecutionMode::PipelineBreaking, - } - } -} - -impl From for ExecutionMode { - fn from(value: FFI_ExecutionMode) -> Self { - match value { - FFI_ExecutionMode::Bounded => ExecutionMode::Bounded, - FFI_ExecutionMode::Unbounded => ExecutionMode::Unbounded, - FFI_ExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, - } - } -} diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs deleted file mode 100644 index c28f95fb3..000000000 --- a/src/ffi/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -pub mod execution_plan; -pub mod record_batch_stream; -pub mod session_config; -pub mod table_provider; - -#[repr(C)] -#[derive(Debug)] -#[allow(non_camel_case_types)] -pub enum FFI_Constraint { - /// Columns with the given indices form a composite primary key (they are - /// jointly unique and not nullable): - PrimaryKey(Vec), - /// Columns with the given indices form a composite unique key: - Unique(Vec), -} - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_Expr {} diff --git a/src/ffi/record_batch_stream.rs b/src/ffi/record_batch_stream.rs deleted file mode 100644 index 5e1d4ff2a..000000000 --- a/src/ffi/record_batch_stream.rs +++ /dev/null @@ -1,182 +0,0 @@ -use std::{ - ffi::{c_char, c_int, c_void, CString}, - ptr::addr_of, -}; - -use arrow::{ - array::StructArray, - ffi::{FFI_ArrowArray, FFI_ArrowSchema}, - ffi_stream::FFI_ArrowArrayStream, -}; -use arrow::{ - array::{Array, RecordBatch, RecordBatchReader}, - ffi_stream::ArrowArrayStreamReader, -}; -use datafusion::error::Result; -use datafusion::{ - error::DataFusionError, - execution::{RecordBatchStream, SendableRecordBatchStream}, -}; -use futures::{executor::block_on, Stream, TryStreamExt}; - -pub fn record_batch_to_arrow_stream(stream: SendableRecordBatchStream) -> FFI_ArrowArrayStream { - let private_data = Box::new(RecoredBatchStreamPrivateData { - stream, - last_error: None, - }); - - FFI_ArrowArrayStream { - get_schema: Some(get_schema), - get_next: Some(get_next), - get_last_error: Some(get_last_error), - release: Some(release_stream), - private_data: Box::into_raw(private_data) as *mut c_void, - } -} - -struct RecoredBatchStreamPrivateData { - stream: SendableRecordBatchStream, - last_error: Option, -} - -// callback used to drop [FFI_ArrowArrayStream] when it is exported. -unsafe extern "C" fn release_stream(stream: *mut FFI_ArrowArrayStream) { - if stream.is_null() { - return; - } - let stream = &mut *stream; - - stream.get_schema = None; - stream.get_next = None; - stream.get_last_error = None; - - let private_data = Box::from_raw(stream.private_data as *mut RecoredBatchStreamPrivateData); - drop(private_data); - - stream.release = None; -} - -// The callback used to get array schema -unsafe extern "C" fn get_schema( - stream: *mut FFI_ArrowArrayStream, - schema: *mut FFI_ArrowSchema, -) -> c_int { - ExportedRecordBatchStream { stream }.get_schema(schema) -} - -// The callback used to get next array -unsafe extern "C" fn get_next( - stream: *mut FFI_ArrowArrayStream, - array: *mut FFI_ArrowArray, -) -> c_int { - ExportedRecordBatchStream { stream }.get_next(array) -} - -// The callback used to get the error from last operation on the `FFI_ArrowArrayStream` -unsafe extern "C" fn get_last_error(stream: *mut FFI_ArrowArrayStream) -> *const c_char { - let mut ffi_stream = ExportedRecordBatchStream { stream }; - // The consumer should not take ownership of this string, we should return - // a const pointer to it. - match ffi_stream.get_last_error() { - Some(err_string) => err_string.as_ptr(), - None => std::ptr::null(), - } -} - -struct ExportedRecordBatchStream { - stream: *mut FFI_ArrowArrayStream, -} - -impl ExportedRecordBatchStream { - fn get_private_data(&mut self) -> &mut RecoredBatchStreamPrivateData { - unsafe { &mut *((*self.stream).private_data as *mut RecoredBatchStreamPrivateData) } - } - - pub fn get_schema(&mut self, out: *mut FFI_ArrowSchema) -> i32 { - let private_data = self.get_private_data(); - let stream = &private_data.stream; - - let schema = FFI_ArrowSchema::try_from(stream.schema().as_ref()); - - match schema { - Ok(schema) => { - unsafe { std::ptr::copy(addr_of!(schema), out, 1) }; - std::mem::forget(schema); - 0 - } - Err(ref err) => { - private_data.last_error = Some( - CString::new(err.to_string()).expect("Error string has a null byte in it."), - ); - 1 - } - } - } - - pub fn get_next(&mut self, out: *mut FFI_ArrowArray) -> i32 { - let private_data = self.get_private_data(); - - let maybe_batch = block_on(private_data.stream.try_next()); - - match maybe_batch { - Ok(None) => { - // Marks ArrowArray released to indicate reaching the end of stream. - unsafe { std::ptr::write(out, FFI_ArrowArray::empty()) } - 0 - } - Ok(Some(batch)) => { - let struct_array = StructArray::from(batch); - let array = FFI_ArrowArray::new(&struct_array.to_data()); - - unsafe { std::ptr::write_unaligned(out, array) }; - 0 - } - Err(err) => { - private_data.last_error = Some( - CString::new(err.to_string()).expect("Error string has a null byte in it."), - ); - 1 - } - } - } - - pub fn get_last_error(&mut self) -> Option<&CString> { - self.get_private_data().last_error.as_ref() - } -} - -pub struct ConsumerRecordBatchStream { - reader: ArrowArrayStreamReader, -} - -impl TryFrom for ConsumerRecordBatchStream { - type Error = DataFusionError; - - fn try_from(value: FFI_ArrowArrayStream) -> std::result::Result { - let reader = ArrowArrayStreamReader::try_new(value)?; - - Ok(Self { reader }) - } -} - -impl RecordBatchStream for ConsumerRecordBatchStream { - fn schema(&self) -> arrow::datatypes::SchemaRef { - self.reader.schema() - } -} - -impl Stream for ConsumerRecordBatchStream { - type Item = Result; - - fn poll_next( - mut self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { - let batch = self - .reader - .next() - .map(|v| v.map_err(|e| DataFusionError::ArrowError(e, None))); - - std::task::Poll::Ready(batch) - } -} diff --git a/src/ffi/session_config.rs b/src/ffi/session_config.rs deleted file mode 100644 index 727bf4a77..000000000 --- a/src/ffi/session_config.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::{ - ffi::{c_void, CString}, - sync::Arc, -}; - -use datafusion::{catalog::Session, prelude::SessionConfig}; - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_SessionConfig { - pub version: i64, - - pub private_data: *mut c_void, -} - -unsafe impl Send for FFI_SessionConfig {} - -pub struct SessionConfigPrivateData { - pub config: SessionConfig, - pub last_error: Option, -} - -struct ExportedSessionConfig { - session: *mut FFI_SessionConfig, -} - -impl ExportedSessionConfig { - fn get_private_data(&mut self) -> &mut SessionConfigPrivateData { - unsafe { &mut *((*self.session).private_data as *mut SessionConfigPrivateData) } - } -} - -impl FFI_SessionConfig { - /// Creates a new [`FFI_TableProvider`]. - pub fn new(session: &dyn Session) -> Self { - let config = session.config().clone(); - let private_data = Box::new(SessionConfigPrivateData { - config, - last_error: None, - }); - - Self { - version: 2, - private_data: Box::into_raw(private_data) as *mut c_void, - } - } -} diff --git a/src/ffi/table_provider.rs b/src/ffi/table_provider.rs deleted file mode 100644 index 85d15a142..000000000 --- a/src/ffi/table_provider.rs +++ /dev/null @@ -1,297 +0,0 @@ -use std::{ - any::Any, - ffi::{c_char, c_int, c_void, CStr, CString}, - ptr::null_mut, - sync::Arc, -}; - -use arrow::{ - datatypes::{Schema, SchemaRef}, - ffi::FFI_ArrowSchema, -}; -use async_trait::async_trait; -use datafusion::{ - catalog::{Session, TableProvider}, - common::DFSchema, - datasource::TableType, - error::DataFusionError, - execution::session_state::SessionStateBuilder, - logical_expr::TableProviderFilterPushDown, - physical_plan::ExecutionPlan, - prelude::{Expr, SessionContext}, -}; -use tokio::runtime::Runtime; - -use super::{ - execution_plan::{ExportedExecutionPlan, FFI_ExecutionPlan}, - session_config::{FFI_SessionConfig, SessionConfigPrivateData}, -}; -use datafusion::error::Result; - -#[repr(C)] -#[derive(Debug)] -#[allow(missing_docs)] -#[allow(non_camel_case_types)] -pub struct FFI_TableProvider { - pub version: i64, - pub schema: Option FFI_ArrowSchema>, - pub scan: Option< - unsafe extern "C" fn( - provider: *const FFI_TableProvider, - session_config: *const FFI_SessionConfig, - n_projections: c_int, - projections: *const c_int, - n_filters: c_int, - filters: *const *const c_char, - limit: c_int, - err_code: *mut c_int, - ) -> *mut FFI_ExecutionPlan, - >, - pub private_data: *mut c_void, -} - -unsafe impl Send for FFI_TableProvider {} -unsafe impl Sync for FFI_TableProvider {} - -struct ProviderPrivateData { - provider: Box, -} - -struct ExportedTableProvider(*const FFI_TableProvider); - -// The callback used to get array schema -unsafe extern "C" fn provider_schema(provider: *const FFI_TableProvider) -> FFI_ArrowSchema { - ExportedTableProvider(provider).provider_schema() -} - -unsafe extern "C" fn scan_fn_wrapper( - provider: *const FFI_TableProvider, - session_config: *const FFI_SessionConfig, - n_projections: c_int, - projections: *const c_int, - n_filters: c_int, - filters: *const *const c_char, - limit: c_int, - err_code: *mut c_int, -) -> *mut FFI_ExecutionPlan { - println!("entered scan_fn_wrapper"); - let config = unsafe { (*session_config).private_data as *const SessionConfigPrivateData }; - let session = SessionStateBuilder::new() - .with_config((*config).config.clone()) - .build(); - let ctx = SessionContext::new_with_state(session); - - let num_projections: usize = n_projections.try_into().unwrap_or(0); - - let projections: Vec = std::slice::from_raw_parts(projections, num_projections) - .iter() - .filter_map(|v| (*v).try_into().ok()) - .collect(); - let maybe_projections = match projections.is_empty() { - true => None, - false => Some(&projections), - }; - - let filters_slice = std::slice::from_raw_parts(filters, n_filters as usize); - let filters_vec: Vec = filters_slice - .iter() - .map(|&s| CStr::from_ptr(s).to_string_lossy().to_string()) - .collect(); - - let limit = limit.try_into().ok(); - - let plan = - ExportedTableProvider(provider).provider_scan(&ctx, maybe_projections, filters_vec, limit); - - println!("leaving scan_fn_wrapper, has plan? {}", plan.is_ok()); - - match plan { - Ok(plan) => { - *err_code = 0; - plan - } - Err(_) => { - *err_code = 1; - null_mut() - } - } -} - -impl ExportedTableProvider { - fn get_private_data(&self) -> &ProviderPrivateData { - unsafe { &*((*self.0).private_data as *const ProviderPrivateData) } - } - - pub fn provider_schema(&self) -> FFI_ArrowSchema { - let private_data = self.get_private_data(); - let provider = &private_data.provider; - - // This does silently fail because TableProvider does not return a result - // so we expect it to always pass. Maybe some logging should be added. - FFI_ArrowSchema::try_from(provider.schema().as_ref()).unwrap_or(FFI_ArrowSchema::empty()) - } - - pub fn provider_scan( - &mut self, - ctx: &SessionContext, - projections: Option<&Vec>, - filters: Vec, - limit: Option, - ) -> Result<*mut FFI_ExecutionPlan> { - let private_data = self.get_private_data(); - let provider = &private_data.provider; - - let schema = provider.schema(); - let df_schema: DFSchema = schema.try_into()?; - - let filter_exprs = filters - .into_iter() - .map(|expr_str| ctx.state().create_logical_expr(&expr_str, &df_schema)) - .collect::>>()?; - - let runtime = Runtime::new().unwrap(); - let plan = - runtime.block_on(provider.scan(&ctx.state(), projections, &filter_exprs, limit))?; - - let plan_boxed = Box::new(FFI_ExecutionPlan::new(plan, ctx.task_ctx())); - Ok(Box::into_raw(plan_boxed)) - } -} - -impl FFI_TableProvider { - /// Creates a new [`FFI_TableProvider`]. - pub fn new(provider: Box) -> Self { - let private_data = Box::new(ProviderPrivateData { provider }); - - Self { - version: 2, - schema: Some(provider_schema), - scan: Some(scan_fn_wrapper), - private_data: Box::into_raw(private_data) as *mut c_void, - } - } - - /** - Replace temporary pointer with updated - # Safety - User must validate the raw pointer is valid. - */ - pub unsafe fn from_raw(raw_provider: *mut FFI_TableProvider) -> Self { - std::ptr::replace(raw_provider, Self::empty()) - } - - /// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C Stream Interface. - pub fn empty() -> Self { - Self { - version: 0, - schema: None, - scan: None, - private_data: std::ptr::null_mut(), - } - } -} - -#[async_trait] -impl TableProvider for FFI_TableProvider { - /// Returns the table provider as [`Any`](std::any::Any) so that it can be - /// downcast to a specific implementation. - fn as_any(&self) -> &dyn Any { - self - } - - /// Get a reference to the schema for this table - fn schema(&self) -> SchemaRef { - let schema = match self.schema { - Some(func) => unsafe { Schema::try_from(&func(self)).ok() }, - None => None, - }; - Arc::new(schema.unwrap_or(Schema::empty())) - } - - /// Get the type of this table for metadata/catalog purposes. - fn table_type(&self) -> TableType { - todo!() - } - - /// Create an ExecutionPlan that will scan the table. - /// The table provider will be usually responsible of grouping - /// the source data into partitions that can be efficiently - /// parallelized or distributed. - async fn scan( - &self, - session: &dyn Session, - projection: Option<&Vec>, - filters: &[Expr], - // limit can be used to reduce the amount scanned - // from the datasource as a performance optimization. - // If set, it contains the amount of rows needed by the `LogicalPlan`, - // The datasource should return *at least* this number of rows if available. - limit: Option, - ) -> Result> { - let scan_fn = self.scan.ok_or(DataFusionError::NotImplemented( - "Scan not defined on FFI_TableProvider".to_string(), - ))?; - - let session_config = FFI_SessionConfig::new(session); - - let n_projections = projection.map(|p| p.len()).unwrap_or(0) as c_int; - let projections: Vec = projection - .map(|p| p.iter().map(|v| *v as c_int).collect()) - .unwrap_or_default(); - let projections_ptr = projections.as_ptr(); - - let n_filters = filters.len() as c_int; - let filters: Vec = filters - .iter() - .filter_map(|f| CString::new(f.to_string()).ok()) - .collect(); - let filters_ptr: Vec<*const i8> = filters.iter().map(|s| s.as_ptr()).collect(); - - let limit = match limit { - Some(l) => l as c_int, - None => -1, - }; - - println!("Within scan about to call unsafe scan_fn"); - let mut err_code = 0; - let plan = unsafe { - let plan_ptr = scan_fn( - self, - &session_config, - n_projections, - projections_ptr, - n_filters, - filters_ptr.as_ptr(), - limit, - &mut err_code, - ); - - if 0 != err_code { - return Err(datafusion::error::DataFusionError::Internal( - "Unable to perform scan via FFI".to_string(), - )); - } - - println!( - "Finished scan_fn inside FFI_TableProvider::scan {}", - plan_ptr.is_null() - ); - - let p = ExportedExecutionPlan::new(plan_ptr)?; - println!("ExportedExecutionPlan::new returned inside scan()"); - p - }; - println!("Scan returned with some plan."); - - Ok(Arc::new(plan)) - } - - /// Tests whether the table provider can make use of a filter expression - /// to optimise data retrieval. - fn supports_filters_pushdown( - &self, - filter: &[&Expr], - ) -> Result> { - todo!() - } -} diff --git a/src/lib.rs b/src/lib.rs index ce737f7a8..0b57e0999 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,8 +61,6 @@ mod udf; mod udwf; pub mod utils; -pub mod ffi; - #[cfg(feature = "mimalloc")] #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; From 88afcc78293e14d3c6d36810b10052d134057258 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 14 Oct 2024 11:26:42 -0400 Subject: [PATCH 13/28] Modifications need to compile against latest DF --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f28830843..325a987ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,8 +40,8 @@ arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "43.0.0", optional = true } datafusion-proto = { version = "43.0.0" } -datafusion-functions-window-common = { version = "43.0.0" } datafusion-ffi = { version = "43.0.0" } +datafusion-functions-window-common = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } From 5e386e6e1792bdd23c5d7d7c3d40e851159c072f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 18 Oct 2024 18:26:10 -0400 Subject: [PATCH 14/28] Set DF to 42.0.0 --- src/context.rs | 4 +--- src/udf.rs | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/context.rs b/src/context.rs index 9fcb84845..b92b73452 100644 --- a/src/context.rs +++ b/src/context.rs @@ -581,10 +581,8 @@ impl PySessionContext { let capsule = capsule.downcast::()?; // validate_pycapsule(capsule, "arrow_array_stream")?; - let provider = unsafe { FFI_TableProvider::from_raw(capsule.pointer() as _) }; + let provider = unsafe { capsule.reference::() }; let provider = ForeignTableProvider::new(provider); - let schema = provider.schema(); - println!("Got schema through TableProvider trait."); let _ = self.ctx.register_table(name, Arc::new(provider))?; } diff --git a/src/udf.rs b/src/udf.rs index 4570e77a6..21f6d2699 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -97,7 +97,7 @@ impl PyScalarUDF { let function = create_udf( name, input_types.0, - return_type.0, + Arc::new(return_type.0), parse_volatility(volatility)?, to_scalar_function_impl(func), ); From 2764bca934125e698b304d5008ec2ba27f465633 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 1 Nov 2024 09:45:25 -0400 Subject: [PATCH 15/28] Rebasing and pulling in a few changes for DF43.0 --- src/context.rs | 7 ++----- src/udf.rs | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/context.rs b/src/context.rs index b92b73452..547916c43 100644 --- a/src/context.rs +++ b/src/context.rs @@ -21,7 +21,6 @@ use std::str::FromStr; use std::sync::Arc; use arrow::array::RecordBatchReader; -use arrow::ffi::FFI_ArrowSchema; use arrow::ffi_stream::ArrowArrayStreamReader; use arrow::pyarrow::FromPyArrow; use datafusion::execution::session_state::SessionStateBuilder; @@ -37,7 +36,6 @@ use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; use crate::expr::sort_expr::PySortExpr; -use crate::expr::PyExpr; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -56,8 +54,8 @@ use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; +use datafusion::datasource::MemTable; use datafusion::datasource::TableProvider; -use datafusion::datasource::{provider, MemTable}; use datafusion::execution::context::{ DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext, }; @@ -574,7 +572,6 @@ impl PySessionContext { &mut self, name: &str, provider: Bound<'_, PyAny>, - py: Python, ) -> PyResult<()> { if provider.hasattr("__datafusion_table_provider__")? { let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; @@ -582,7 +579,7 @@ impl PySessionContext { // validate_pycapsule(capsule, "arrow_array_stream")?; let provider = unsafe { capsule.reference::() }; - let provider = ForeignTableProvider::new(provider); + let provider: ForeignTableProvider = provider.into(); let _ = self.ctx.register_table(name, Arc::new(provider))?; } diff --git a/src/udf.rs b/src/udf.rs index 21f6d2699..4570e77a6 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -97,7 +97,7 @@ impl PyScalarUDF { let function = create_udf( name, input_types.0, - Arc::new(return_type.0), + return_type.0, parse_volatility(volatility)?, to_scalar_function_impl(func), ); From fbf54f1fce16323820853e8fc972825d237c3006 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 07:08:56 -0500 Subject: [PATCH 16/28] Add wrapper for register table provider --- python/datafusion/context.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 5221c866c..a07b5d175 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -685,6 +685,14 @@ def deregister_table(self, name: str) -> None: """Remove a table from the session.""" self.ctx.deregister_table(name) + def register_table_provider(self, name: str, provider: Any) -> None: + """Register a table provider. + + This table provider must have a method called ``__datafusion_table_provider__`` + which returns a PyCapsule that exposes a ``FFI_TableProvider``. + """ + self.ctx.register_table_provider(name, provider) + def register_record_batches( self, name: str, partitions: list[list[pyarrow.RecordBatch]] ) -> None: From 01a15f16359e5077441a3e3d03c00a7916ceab93 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 07:39:51 -0500 Subject: [PATCH 17/28] Suppress deprecation warning --- python/tests/test_dataframe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 330475302..b82f95e35 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -306,6 +306,7 @@ def test_unnest_without_nulls(nested_df): assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9]) +@pytest.mark.filterwarnings("ignore:`join_keys`:DeprecationWarning") def test_join(): ctx = SessionContext() From 7f5811723e3627e8be5f3edbf5ec86806ea323b1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 11:58:55 -0500 Subject: [PATCH 18/28] Add example for FFI table provider --- .../ffi-table-provider/.cargo/config.toml | 12 + examples/ffi-table-provider/Cargo.lock | 3175 +++++++++++++++++ examples/ffi-table-provider/Cargo.toml | 19 + examples/ffi-table-provider/build.rs | 20 + examples/ffi-table-provider/pyproject.toml | 16 + .../python/tests/test_table_provider.py | 40 + examples/ffi-table-provider/src/lib.rs | 115 + 7 files changed, 3397 insertions(+) create mode 100644 examples/ffi-table-provider/.cargo/config.toml create mode 100644 examples/ffi-table-provider/Cargo.lock create mode 100644 examples/ffi-table-provider/Cargo.toml create mode 100644 examples/ffi-table-provider/build.rs create mode 100644 examples/ffi-table-provider/pyproject.toml create mode 100644 examples/ffi-table-provider/python/tests/test_table_provider.py create mode 100644 examples/ffi-table-provider/src/lib.rs diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/ffi-table-provider/.cargo/config.toml new file mode 100644 index 000000000..91a099a61 --- /dev/null +++ b/examples/ffi-table-provider/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock new file mode 100644 index 000000000..3b57cac75 --- /dev/null +++ b/examples/ffi-table-provider/Cargo.lock @@ -0,0 +1,3175 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "abi_stable" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" +dependencies = [ + "abi_stable_derive", + "abi_stable_shared", + "const_panic", + "core_extensions", + "crossbeam-channel", + "generational-arena", + "libloading", + "lock_api", + "parking_lot", + "paste", + "repr_offset", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "abi_stable_derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" +dependencies = [ + "abi_stable_shared", + "as_derive_utils", + "core_extensions", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "typed-arena", +] + +[[package]] +name = "abi_stable_shared" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" +dependencies = [ + "core_extensions", +] + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "arrow-select" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "as_derive_utils" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" +dependencies = [ + "core_extensions", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "async-compression" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-ffi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" +dependencies = [ + "abi_stable", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "chrono-tz" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +dependencies = [ + "parse-zoneinfo", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "const_panic" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81" + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core_extensions" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c71dc07c9721607e7a16108336048ee978c3a8b129294534272e8bac96c0ee" +dependencies = [ + "core_extensions_proc_macros", +] + +[[package]] +name = "core_extensions_proc_macros" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" + +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", +] + +[[package]] +name = "datafusion-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser", + "tokio", +] + +[[package]] +name = "datafusion-common-runtime" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +dependencies = [ + "arrow", + "datafusion-common", + "itertools", + "paste", +] + +[[package]] +name = "datafusion-ffi" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +dependencies = [ + "abi_stable", + "arrow", + "async-ffi", + "async-trait", + "datafusion", + "datafusion-proto", + "doc-comment", + "futures", + "log", + "prost", +] + +[[package]] +name = "datafusion-functions" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + +[[package]] +name = "datafusion-functions-nested" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools", + "log", + "paste", + "rand", +] + +[[package]] +name = "datafusion-functions-window" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools", +] + +[[package]] +name = "datafusion-physical-plan" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-proto" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" +dependencies = [ + "arrow", + "chrono", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-proto-common", + "object_store", + "prost", +] + +[[package]] +name = "datafusion-proto-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "object_store", + "prost", +] + +[[package]] +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", + "strum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" + +[[package]] +name = "ffi-table-provider" +version = "0.1.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion", + "datafusion-ffi", + "pyo3", + "pyo3-build-config", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "24.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generational-arena" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.1", +] + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "parquet" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.14.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "repr_offset" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" +dependencies = [ + "tstr", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "sqlparser" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tempfile" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tstr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" +dependencies = [ + "tstr_proc_macros", +] + +[[package]] +name = "tstr_proc_macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "url" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml new file mode 100644 index 000000000..10b8a0922 --- /dev/null +++ b/examples/ffi-table-provider/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "ffi-table-provider" +version = "0.1.0" +edition = "2021" + +[dependencies] +datafusion = { version = "43.0.0" } +datafusion-ffi = { version = "43.0.0" } +pyo3 = { version = "0.22.6", features = ["extension-module", "abi3", "abi3-py38"] } +arrow = { version = "53.2.0" } +arrow-array = { version = "53.2.0" } +arrow-schema = { version = "53.2.0" } + +[build-dependencies] +pyo3-build-config = "0.22.6" + +[lib] +name = "ffi_table_provider" +crate-type = ["cdylib", "rlib"] diff --git a/examples/ffi-table-provider/build.rs b/examples/ffi-table-provider/build.rs new file mode 100644 index 000000000..4878d8b0e --- /dev/null +++ b/examples/ffi-table-provider/build.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +fn main() { + pyo3_build_config::add_extension_module_link_args(); +} diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml new file mode 100644 index 000000000..7202c5c6d --- /dev/null +++ b/examples/ffi-table-provider/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["maturin>=1.6,<2.0"] +build-backend = "maturin" + +[project] +name = "ffi_table_provider" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/examples/ffi-table-provider/python/tests/test_table_provider.py b/examples/ffi-table-provider/python/tests/test_table_provider.py new file mode 100644 index 000000000..56c05e4fa --- /dev/null +++ b/examples/ffi-table-provider/python/tests/test_table_provider.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datafusion import SessionContext +from ffi_table_provider import MyTableProvider +import pyarrow as pa + + +def test_table_loading(): + ctx = SessionContext() + table = MyTableProvider(3, 2, 4) + ctx.register_table_provider("t", table) + result = ctx.table("t").collect() + + assert len(result) == 4 + assert result[0].num_columns == 3 + + result = [r.column(0) for r in result] + expected = [ + pa.array([0, 1], type=pa.int32()), + pa.array([2, 3, 4], type=pa.int32()), + pa.array([4, 5, 6, 7], type=pa.int32()), + pa.array([6, 7, 8, 9, 10], type=pa.int32()), + ] + + assert result == expected diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-table-provider/src/lib.rs new file mode 100644 index 000000000..473244d88 --- /dev/null +++ b/examples/ffi-table-provider/src/lib.rs @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::CString, sync::Arc}; + +use arrow_array::ArrayRef; +use datafusion::{ + arrow::{ + array::RecordBatch, + datatypes::{DataType, Field, Schema}, + }, + datasource::MemTable, + error::{DataFusionError, Result}, +}; +use datafusion_ffi::table_provider::FFI_TableProvider; +use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule}; + +/// In order to provide a test that demonstrates different sized record batches, +/// the first batch will have num_rows, the second batch num_rows+1, and so on. +#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)] +#[derive(Clone)] +struct MyTableProvider { + num_cols: usize, + num_rows: usize, + num_batches: usize, +} + +fn create_record_batch( + schema: &Arc, + num_cols: usize, + start_value: i32, + num_values: usize, +) -> Result { + let end_value = start_value + num_values as i32; + let row_values: Vec = (start_value..end_value).collect(); + + let columns: Vec<_> = (0..num_cols) + .map(|_| { + std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef + }) + .collect(); + + RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from) +} + +impl MyTableProvider { + fn create_table(&self) -> Result { + let fields: Vec<_> = (0..self.num_cols) + .map(|idx| (b'A' + idx as u8) as char) + .map(|col_name| Field::new(col_name, DataType::Int32, true)) + .collect(); + + let schema = Arc::new(Schema::new(fields)); + + let batches: Result> = (0..self.num_batches) + .map(|batch_idx| { + let start_value = batch_idx * self.num_rows; + create_record_batch( + &schema, + self.num_cols, + start_value as i32, + self.num_rows + batch_idx, + ) + }) + .collect(); + + MemTable::try_new(schema, vec![batches?]) + } +} + +#[pymethods] +impl MyTableProvider { + #[new] + fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self { + Self { + num_cols, + num_rows, + num_batches, + } + } + + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let provider = self + .create_table() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let provider = FFI_TableProvider::new(Arc::new(provider), false); + + PyCapsule::new_bound(py, provider, Some(name.clone())) + } +} + +#[pymodule] +fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} From cee25b8639336e42fd276bb7fcfd9ac457150e15 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 12:02:27 -0500 Subject: [PATCH 19/28] Add pytest for FFI module to CI --- .github/workflows/test.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f9383db5f..d57a930fb 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -110,6 +110,12 @@ jobs: pip install -e . -vv pytest -v . + - name: Test FFI module + run: | + cd examples/ffi-table-provider + maturin build --release --strip + pytest + - name: Cache the generated dataset id: cache-tpch-dataset uses: actions/cache@v4 From a28455441ffc7161468a4eb5ea5adf432e930cee Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 12:05:53 -0500 Subject: [PATCH 20/28] Add license text --- examples/ffi-table-provider/Cargo.toml | 17 +++++++++++++++++ examples/ffi-table-provider/pyproject.toml | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 10b8a0922..4e54eaf03 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [package] name = "ffi-table-provider" version = "0.1.0" diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml index 7202c5c6d..116efae9c 100644 --- a/examples/ffi-table-provider/pyproject.toml +++ b/examples/ffi-table-provider/pyproject.toml @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [build-system] requires = ["maturin>=1.6,<2.0"] build-backend = "maturin" From 5551ef9faf0349859686b07a740f865be08809a2 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 12:56:45 -0500 Subject: [PATCH 21/28] Change the name of the FFI table provider test so it doesn't try to run during the first pass of pytest when the module hasn't been built --- .github/workflows/test.yaml | 2 +- .../tests/{test_table_provider.py => _test_table_provider.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/ffi-table-provider/python/tests/{test_table_provider.py => _test_table_provider.py} (100%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d57a930fb..21adbcf4f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -114,7 +114,7 @@ jobs: run: | cd examples/ffi-table-provider maturin build --release --strip - pytest + pytest _test_table_provider.py - name: Cache the generated dataset id: cache-tpch-dataset diff --git a/examples/ffi-table-provider/python/tests/test_table_provider.py b/examples/ffi-table-provider/python/tests/_test_table_provider.py similarity index 100% rename from examples/ffi-table-provider/python/tests/test_table_provider.py rename to examples/ffi-table-provider/python/tests/_test_table_provider.py From 50b4bf2469f67d1910345004f34f92664a0ecf92 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 13:22:01 -0500 Subject: [PATCH 22/28] Build example in build stage to be used during test stage --- .github/workflows/build.yml | 5 ++++- .github/workflows/test.yaml | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f52913ce8..0209f66d2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -156,7 +156,10 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package - run: maturin build --release --strip --features substrait + run: | + maturin build --release --strip --features substrait + cd examples/ffi-table-provider + maturin build --release --strip - name: List Mac wheels run: find target/wheels/ diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 21adbcf4f..f3d3b843a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -113,7 +113,6 @@ jobs: - name: Test FFI module run: | cd examples/ffi-table-provider - maturin build --release --strip pytest _test_table_provider.py - name: Cache the generated dataset From 468ef5ba27f5deac6e45edc6de27237e63c649dc Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 14:19:39 -0500 Subject: [PATCH 23/28] Combine pytests into one stage --- .github/workflows/test.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f3d3b843a..07ab97400 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -109,9 +109,6 @@ jobs: source venv/bin/activate pip install -e . -vv pytest -v . - - - name: Test FFI module - run: | cd examples/ffi-table-provider pytest _test_table_provider.py From a0a39fd6dffdcc82e7d1f4f340c29193c3c2d4f0 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 14:52:26 -0500 Subject: [PATCH 24/28] Fix path for unit test --- .github/workflows/test.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 07ab97400..124c34e3e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -109,8 +109,7 @@ jobs: source venv/bin/activate pip install -e . -vv pytest -v . - cd examples/ffi-table-provider - pytest _test_table_provider.py + pytest examples/ffi-table-provider/python/tests/_test_table_provider.py - name: Cache the generated dataset id: cache-tpch-dataset From 9529beae7a196816205c162a0e1215aa699485d8 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 16:01:52 -0500 Subject: [PATCH 25/28] Installing maturin for ffi test in test script --- .github/workflows/build.yml | 3 +-- .github/workflows/test.yaml | 10 +++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0209f66d2..084a96192 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -158,8 +158,7 @@ jobs: - name: Build Python package run: | maturin build --release --strip --features substrait - cd examples/ffi-table-provider - maturin build --release --strip + - name: List Mac wheels run: find target/wheels/ diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 124c34e3e..d167e57b5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -109,7 +109,15 @@ jobs: source venv/bin/activate pip install -e . -vv pytest -v . - pytest examples/ffi-table-provider/python/tests/_test_table_provider.py + + - name: FFI unit tests + run: | + source venv/bin/activate + pip install -e . -vv + pip install maturin==1.5.1 + cd examples/ffi-table-provider + maturin build --release --strip + pytest python/tests/_test_table_provider.py - name: Cache the generated dataset id: cache-tpch-dataset From 15458c7bd820c05f0ca77f32e85ca7e5ee393f65 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 11 Nov 2024 16:36:06 -0500 Subject: [PATCH 26/28] Need to install the wheel for unit test --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d167e57b5..21faedecd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -116,7 +116,7 @@ jobs: pip install -e . -vv pip install maturin==1.5.1 cd examples/ffi-table-provider - maturin build --release --strip + maturin develop --release --strip pytest python/tests/_test_table_provider.py - name: Cache the generated dataset From 64dcbd3cb0c7d1628d67d8cfc35207decb1d9e52 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 12 Nov 2024 05:25:32 -0500 Subject: [PATCH 27/28] Add online documentation about using custom table providers --- docs/source/user-guide/io/index.rst | 1 + docs/source/user-guide/io/table_provider.rst | 56 ++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 docs/source/user-guide/io/table_provider.rst diff --git a/docs/source/user-guide/io/index.rst b/docs/source/user-guide/io/index.rst index 05411327e..b885cfeda 100644 --- a/docs/source/user-guide/io/index.rst +++ b/docs/source/user-guide/io/index.rst @@ -26,3 +26,4 @@ IO csv json parquet + table_provider diff --git a/docs/source/user-guide/io/table_provider.rst b/docs/source/user-guide/io/table_provider.rst new file mode 100644 index 000000000..2ff9ae46f --- /dev/null +++ b/docs/source/user-guide/io/table_provider.rst @@ -0,0 +1,56 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Custom Table Provider +===================== + +If you have a custom data source that you want to integrate with DataFusion, you can do so by +implementing the `TableProvider `_ +interface in Rust and then exposing it in Python. To do so, +you must use DataFusion 43.0.0 or later and expose a `FFI_TableProvider `_ +via `PyCapsule `_. + +A complete example can be found in the `examples folder `_. + +.. code-block:: rust + + #[pymethods] + impl MyTableProvider { + + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let provider = Arc::new(self.clone()) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let provider = FFI_TableProvider::new(Arc::new(provider), false); + + PyCapsule::new_bound(py, provider, Some(name.clone())) + } + } + +Once you have this library available, in python you can register your table provider +to the ``SessionContext``. + +.. code-block:: python + + provider = MyTableProvider() + ctx.register_table_provider("my_table", provider) + + ctx.table("my_table").show() From e31e478616e3d1295b544778fcaef0ec7d68ee8a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 12 Nov 2024 05:32:00 -0500 Subject: [PATCH 28/28] Raise an error if method is not implemented when it is expected --- src/context.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/context.rs b/src/context.rs index 547916c43..8675e97df 100644 --- a/src/context.rs +++ b/src/context.rs @@ -28,7 +28,7 @@ use object_store::ObjectStore; use url::Url; use uuid::Uuid; -use pyo3::exceptions::{PyKeyError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyKeyError, PyNotImplementedError, PyTypeError, PyValueError}; use pyo3::prelude::*; use crate::catalog::{PyCatalog, PyTable}; @@ -582,8 +582,13 @@ impl PySessionContext { let provider: ForeignTableProvider = provider.into(); let _ = self.ctx.register_table(name, Arc::new(provider))?; + + Ok(()) + } else { + Err(PyNotImplementedError::new_err( + "__datafusion_table_provider__ does not exist on Table Provider object.", + )) } - Ok(()) } pub fn register_record_batches(