From 91e75d7e6303c1a7331e8e90eaad9b095ace929b Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 6 Jun 2023 14:41:26 +0100 Subject: [PATCH] Update to object_store 0.6 and arrow 41 (#6374) * Update object_store 0.6 and arrow * Remove pin * Fix pyarrow * Tomlfmt * Update flight_sql_service * Fix parquet_sql_multiple_files --- Cargo.toml | 12 +- benchmarks/src/bin/parquet.rs | 2 +- datafusion-cli/Cargo.lock | 143 +++++++++--------- datafusion-cli/Cargo.toml | 6 +- datafusion-cli/src/exec.rs | 9 +- datafusion-cli/src/object_storage.rs | 59 +++++++- datafusion-examples/Cargo.toml | 2 +- .../examples/flight_sql_server.rs | 93 +++++++++++- .../examples/parquet_sql_multiple_files.rs | 2 +- datafusion/common/Cargo.toml | 4 +- datafusion/common/src/pyarrow.rs | 12 +- datafusion/core/Cargo.toml | 2 +- .../core/src/datasource/file_format/csv.rs | 1 + .../core/src/datasource/file_format/mod.rs | 8 +- .../src/datasource/file_format/parquet.rs | 17 +-- datafusion/core/src/datasource/listing/mod.rs | 2 + .../core/src/datasource/listing/table.rs | 1 + .../file_format/chunked_store.rs | 6 +- .../core/src/physical_plan/file_format/mod.rs | 1 + .../src/physical_plan/file_format/parquet.rs | 1 + datafusion/core/src/test/object_store.rs | 1 + datafusion/core/src/test_util/parquet.rs | 1 + .../core/tests/parquet/custom_reader.rs | 1 + datafusion/core/tests/parquet/page_pruning.rs | 1 + .../core/tests/parquet/schema_coercion.rs | 1 + datafusion/core/tests/path_partition.rs | 11 +- datafusion/execution/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- .../proto/src/physical_plan/from_proto.rs | 1 + datafusion/substrait/Cargo.toml | 2 +- .../substrait/src/physical_plan/consumer.rs | 1 + 31 files changed, 288 insertions(+), 119 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a2885d6cfce4..6b24a44e9ad7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,12 +45,12 @@ repository = "https://github.com/apache/arrow-datafusion" rust-version = "1.64" [workspace.dependencies] -arrow = { version = "40.0.0", features = ["prettyprint"] } -arrow-flight = { version = "40.0.0", features = ["flight-sql-experimental"] } -arrow-buffer = { version = "40.0.0", default-features = false } -arrow-schema = { version = "40.0.0", default-features = false } -arrow-array = { version = "40.0.0", default-features = false, features = ["chrono-tz"] } -parquet = { version = "40.0.0", features = ["arrow", "async", "object_store"] } +arrow = { version = "41.0.0", features = ["prettyprint"] } +arrow-flight = { version = "41.0.0", features = ["flight-sql-experimental"] } +arrow-buffer = { version = "41.0.0", default-features = false } +arrow-schema = { version = "41.0.0", default-features = false } +arrow-array = { version = "41.0.0", default-features = false, features = ["chrono-tz"] } +parquet = { version = "41.0.0", features = ["arrow", "async", "object_store"] } [profile.release] codegen-units = 1 diff --git a/benchmarks/src/bin/parquet.rs b/benchmarks/src/bin/parquet.rs index 589f967a6df0..98b2da7c2fcd 100644 --- a/benchmarks/src/bin/parquet.rs +++ b/benchmarks/src/bin/parquet.rs @@ -87,7 +87,7 @@ impl Opt { if let Some(s) = self.page_size { props_builder = props_builder - .set_data_pagesize_limit(s) + .set_data_page_size_limit(s) .set_write_batch_size(s); } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a3b1de63c7ff..4fff824c36ad 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -74,9 +74,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6619cab21a0cdd8c9b9f1d9e09bfaa9b1974e5ef809a6566aef0b998caf38ace" +checksum = "4a46441ae78c0c5915f62aa32cad9910647c19241456dd24039646dd96d494a5" dependencies = [ "ahash", "arrow-arith", @@ -96,9 +96,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0dc95485623a76e00929bda8caa40c1f838190952365c4f43a7b9ae86d03e94" +checksum = "350c5067470aeeb38dcfcc1f7e9c397098116409c9087e43ca99c231020635d9" dependencies = [ "arrow-array", "arrow-buffer", @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3267847f53d3042473cfd2c769afd8d74a6d7d201fc3a34f5cb84c0282ef47a7" +checksum = "6049e031521c4e7789b7530ea5991112c0a375430094191f3b74bdf37517c9a9" dependencies = [ "ahash", "arrow-buffer", @@ -128,9 +128,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f66553e66e120ac4b21570368ee9ebf35ff3f5399f872b0667699e145678f5" +checksum = "a83450b94b9fe018b65ba268415aaab78757636f68b7f37b6bc1f2a3888af0a0" dependencies = [ "half", "num", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e6f3579dbf0d97c683d451b2550062b0f0e62a3169bf74238b5f59f44ad6d8" +checksum = "249198411254530414805f77e88e1587b0914735ea180f906506905721f7a44a" dependencies = [ "arrow-array", "arrow-buffer", @@ -155,9 +155,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373579c4c1a8f5307d3125b7a89c700fcf8caf85821c77eb4baab3855ae0aba5" +checksum = "ec9ee134298aa895ef9d791dc9cc557cecd839108843830bd35824fcd8d7f721" dependencies = [ "arrow-array", "arrow-buffer", @@ -174,9 +174,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61bc8df9912cca6642665fdf989d6fa0de2570f18a7f709bcf59d29de96d2097" +checksum = "4d48dcbed83d741d4af712af17f6d952972b8f6491b24ee2415243a7e37c6438" dependencies = [ "arrow-buffer", "arrow-schema", @@ -186,9 +186,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0105dcf5f91daa7182d87b713ee0b32b3bfc88e0c48e7dc3e9d6f1277a07d1ae" +checksum = "ea8d7b138c5414aeef5dd08abacf362f87ed9b1168ea38d60a6f67590c3f7d99" dependencies = [ "arrow-array", "arrow-buffer", @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73134fb5b5ec8770f8cbb214c2c487b2d350081e403ca4eeeb6f8f5e19846ac" +checksum = "a3a597fdca885a81f2e7ab0bacaa0bd2dfefb4cd6a2e5a3d1677396a68673101" dependencies = [ "arrow-array", "arrow-buffer", @@ -220,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f25bc66e18d4c2aa1fe2f9bb03e2269da60e636213210385ae41a107f9965a" +checksum = "29be2d5fadaab29e4fa6a7e527ceaa1c2cddc57dc6d86c062f7a05adcd8df71e" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1095ff85ea4f5ff02d17b30b089de31b51a50be01c6b674f0a0509ab771232f1" +checksum = "b6e0bd6ad24d56679b3317b499b0de61bca16d3142896908cce1aa943e56e981" dependencies = [ "ahash", "arrow-array", @@ -250,15 +250,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25187bbef474151a2e4ddec67b9e34bda5cbfba292dc571392fa3a1f71ff5a82" +checksum = "2b71d8d68d0bc2e648e4e395896dc518be8b90c5f0f763c59083187c3d46184b" [[package]] name = "arrow-select" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0d4ee884aec3aa05e41478e3cd312bf609de9babb5d187a43fb45931da4da4" +checksum = "470cb8610bdfda56554a436febd4e457e506f3c42e01e545a1ea7ecf2a4c8823" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,9 +269,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d71c3ffe4c07e66ce8fdc6aed5b00e0e60c5144911879b10546f5b72d8fa1c" +checksum = "70f8a2e4ff9dbbd51adbabf92098b71e3eb2ef0cfcb75236ca7c3ce087cce038" dependencies = [ "arrow-array", "arrow-buffer", @@ -330,9 +330,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "aws-config" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3d1e2a1f1ab3ac6c4b884e37413eaa03eb9d901e4fc68ee8f5c1d49721680e" +checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9" dependencies = [ "aws-credential-types", "aws-http", @@ -346,6 +346,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "fastrand", "hex", "http", "hyper", @@ -359,12 +360,13 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0696a0523a39a19087747e4dafda0362dc867531e3d72a3f195564c84e5e08" +checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae" dependencies = [ "aws-smithy-async", "aws-smithy-types", + "fastrand", "tokio", "tracing", "zeroize", @@ -372,9 +374,9 @@ dependencies = [ [[package]] name = "aws-endpoint" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80a4f935ab6a1919fbfd6102a80c4fccd9ff5f47f94ba154074afe1051903261" +checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -386,9 +388,9 @@ dependencies = [ [[package]] name = "aws-http" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82976ca4e426ee9ca3ffcf919d9b2c8d14d0cd80d43cc02173737a8f07f28d4d" +checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -405,9 +407,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "0.24.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0119bacf0c42f587506769390983223ba834e605f049babe514b2bd646dbb2" +checksum = "c8b812340d86d4a766b2ca73f740dfd47a97c2dff0c06c8517a16d88241957e4" dependencies = [ "aws-credential-types", "aws-endpoint", @@ -425,13 +427,14 @@ dependencies = [ "regex", "tokio-stream", "tower", + "tracing", ] [[package]] name = "aws-sdk-sts" -version = "0.24.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "270b6a33969ebfcb193512fbd5e8ee5306888ad6c6d5d775cdbfb2d50d94de26" +checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b" dependencies = [ "aws-credential-types", "aws-endpoint", @@ -455,9 +458,9 @@ dependencies = [ [[package]] name = "aws-sig-auth" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "660a02a98ab1af83bd8d714afbab2d502ba9b18c49e7e4cddd6bf8837ff778cb" +checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -469,9 +472,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "0.54.2" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86529e7b64d902efea8fff52c1b2529368d04f90305cf632729e3713f6b57dc0" +checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c" dependencies = [ "aws-smithy-http", "form_urlencoded", @@ -488,9 +491,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63c712a28a4f2f2139759235c08bf98aca99d4fdf1b13c78c5f95613df0a5db9" +checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880" dependencies = [ "futures-util", "pin-project-lite", @@ -500,9 +503,9 @@ dependencies = [ [[package]] name = "aws-smithy-client" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "104ca17f56cde00a10207169697dfe9c6810db339d52fb352707e64875b30a44" +checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -516,6 +519,7 @@ dependencies = [ "hyper-rustls 0.23.2", "lazy_static", "pin-project-lite", + "rustls 0.20.8", "tokio", "tower", "tracing", @@ -523,9 +527,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "873f316f1833add0d3aa54ed1b0cd252ddd88c792a0cf839886400099971e844" +checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28" dependencies = [ "aws-smithy-types", "bytes", @@ -543,9 +547,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-tower" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38231d3f5dac9ac7976f44e12803add1385119ffca9e5f050d8e980733d164" +checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -559,18 +563,18 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd83ff2b79e9f729746fcc8ad798676b68fe6ea72986571569a5306a277a182" +checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-query" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2f0445dafe9d2cd50b44339ae3c3ed46549aad8ac696c52ad660b3e7ae8682b" +checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d" dependencies = [ "aws-smithy-types", "urlencoding", @@ -578,9 +582,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8161232eda10290f5136610a1eb9de56aceaccd70c963a26a260af20ac24794f" +checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8" dependencies = [ "base64-simd", "itoa", @@ -591,18 +595,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.54.4" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343ffe9a9bb3f542675f4df0e0d5933513d6ad038ca3907ad1767ba690a99684" +checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "0.54.1" +version = "0.55.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8f15b34253b68cde08e39b0627cc6101bcca64351229484b4743392c035d057" +checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1015,6 +1019,8 @@ version = "26.0.0" dependencies = [ "arrow", "async-trait", + "aws-config", + "aws-credential-types", "clap", "datafusion", "dirs", @@ -2032,18 +2038,17 @@ dependencies = [ [[package]] name = "object_store" -version = "0.5.6" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" +checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58" dependencies = [ "async-trait", - "aws-config", - "aws-credential-types", - "aws-types", "base64", "bytes", "chrono", "futures", + "humantime", + "hyper", "itertools", "parking_lot", "percent-encoding", @@ -2119,9 +2124,9 @@ dependencies = [ [[package]] name = "parquet" -version = "40.0.0" +version = "41.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6a656fcc17e641657c955742c689732684e096f790ff30865d9f8dcc39f7c4a" +checksum = "6880c32d81884ac4441d9f4b027df8561be23b54f3ac1e62086fa42753dd3faa" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 16d5e023240f..a604e017b744 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -29,14 +29,16 @@ rust-version = "1.62" readme = "README.md" [dependencies] -arrow = "40.0.0" +arrow = "41.0.0" async-trait = "0.1.41" +aws-config = "0.55" +aws-credential-types = "0.55" clap = { version = "3", features = ["derive", "cargo"] } datafusion = { path = "../datafusion/core", version = "26.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } -object_store = { version = "0.5.5", features = ["aws", "gcp", "aws_profile"] } +object_store = { version = "0.6.1", features = ["aws", "gcp"] } parking_lot = { version = "0.12" } rustyline = "11.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index 1d8950256a6c..15d6d81b6dd4 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -185,7 +185,7 @@ async fn exec_and_print( let plan = ctx.state().create_logical_plan(&sql).await?; let df = match &plan { LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) => { - create_external_table(ctx, cmd)?; + create_external_table(ctx, cmd).await?; ctx.execute_logical_plan(plan).await? } _ => ctx.execute_logical_plan(plan).await?, @@ -197,7 +197,10 @@ async fn exec_and_print( Ok(()) } -fn create_external_table(ctx: &SessionContext, cmd: &CreateExternalTable) -> Result<()> { +async fn create_external_table( + ctx: &SessionContext, + cmd: &CreateExternalTable, +) -> Result<()> { let table_path = ListingTableUrl::parse(&cmd.location)?; let scheme = table_path.scheme(); let url: &Url = table_path.as_ref(); @@ -205,7 +208,7 @@ fn create_external_table(ctx: &SessionContext, cmd: &CreateExternalTable) -> Res // registering the cloud object store dynamically using cmd.options let store = match scheme { "s3" => { - let builder = get_s3_object_store_builder(url, cmd)?; + let builder = get_s3_object_store_builder(url, cmd).await?; Arc::new(builder.build()?) as Arc } "oss" => { diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 8d29f80a4ba6..7b0968d40218 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -15,14 +15,20 @@ // specific language governing permissions and limitations // under the License. +use async_trait::async_trait; +use aws_credential_types::provider::ProvideCredentials; use datafusion::{ error::{DataFusionError, Result}, logical_expr::CreateExternalTable, }; -use object_store::{aws::AmazonS3Builder, gcp::GoogleCloudStorageBuilder}; +use object_store::aws::AwsCredential; +use object_store::{ + aws::AmazonS3Builder, gcp::GoogleCloudStorageBuilder, CredentialProvider, +}; +use std::sync::Arc; use url::Url; -pub fn get_s3_object_store_builder( +pub async fn get_s3_object_store_builder( url: &Url, cmd: &CreateExternalTable, ) -> Result { @@ -36,10 +42,29 @@ pub fn get_s3_object_store_builder( builder = builder .with_access_key_id(access_key_id) .with_secret_access_key(secret_access_key); - } - if let Some(session_token) = cmd.options.get("session_token") { - builder = builder.with_token(session_token); + if let Some(session_token) = cmd.options.get("session_token") { + builder = builder.with_token(session_token); + } + } else { + let config = aws_config::from_env().load().await; + if let Some(region) = config.region() { + builder = builder.with_region(region.to_string()); + } + + let credentials = config + .credentials_provider() + .ok_or_else(|| { + DataFusionError::ObjectStore(object_store::Error::Generic { + store: "S3", + source: format!("Failed to get S3 credentials from environment") + .into(), + }) + })? + .clone(); + + let credentials = Arc::new(S3CredentialProvider { credentials }); + builder = builder.with_credentials(credentials); } if let Some(region) = cmd.options.get("region") { @@ -49,6 +74,30 @@ pub fn get_s3_object_store_builder( Ok(builder) } +#[derive(Debug)] +struct S3CredentialProvider { + credentials: aws_credential_types::provider::SharedCredentialsProvider, +} + +#[async_trait] +impl CredentialProvider for S3CredentialProvider { + type Credential = AwsCredential; + + async fn get_credential(&self) -> object_store::Result> { + let creds = self.credentials.provide_credentials().await.map_err(|e| { + object_store::Error::Generic { + store: "S3", + source: Box::new(e), + } + })?; + Ok(Arc::new(AwsCredential { + key_id: creds.access_key_id().to_string(), + secret_key: creds.secret_access_key().to_string(), + token: creds.session_token().map(ToString::to_string), + })) + } +} + pub fn get_oss_object_store_builder( url: &Url, cmd: &CreateExternalTable, diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index b7bf1161f868..31595c980a30 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -51,7 +51,7 @@ futures = "0.3" log = "0.4" mimalloc = { version = "0.1", default-features = false } num_cpus = "1.13.0" -object_store = { version = "0.5.4", features = ["aws"] } +object_store = { version = "0.6.1", features = ["aws"] } prost = { version = "0.11", default-features = false } prost-derive = { version = "0.11", default-features = false } serde = { version = "1.0.136", features = ["derive"] } diff --git a/datafusion-examples/examples/flight_sql_server.rs b/datafusion-examples/examples/flight_sql_server.rs index db5e9ce51787..1cf288b7d24d 100644 --- a/datafusion-examples/examples/flight_sql_server.rs +++ b/datafusion-examples/examples/flight_sql_server.rs @@ -22,13 +22,18 @@ use arrow_flight::flight_descriptor::DescriptorType; use arrow_flight::flight_service_server::{FlightService, FlightServiceServer}; use arrow_flight::sql::server::FlightSqlService; use arrow_flight::sql::{ + ActionBeginSavepointRequest, ActionBeginSavepointResult, + ActionBeginTransactionRequest, ActionBeginTransactionResult, + ActionCancelQueryRequest, ActionCancelQueryResult, ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest, - ActionCreatePreparedStatementResult, Any, CommandGetCatalogs, + ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest, + ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery, - CommandStatementUpdate, ProstMessageExt, SqlInfo, TicketStatementQuery, + CommandStatementSubstraitPlan, CommandStatementUpdate, ProstMessageExt, SqlInfo, + TicketStatementQuery, }; use arrow_flight::{ Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, @@ -251,6 +256,17 @@ impl FlightSqlService for FlightSqlServiceImpl { Err(Status::unimplemented("Implement get_flight_info_statement")) } + async fn get_flight_info_substrait_plan( + &self, + _query: CommandStatementSubstraitPlan, + _request: Request, + ) -> Result, Status> { + info!("get_flight_info_substrait_plan"); + Err(Status::unimplemented( + "Implement get_flight_info_substrait_plan", + )) + } + async fn get_flight_info_prepared_statement( &self, cmd: CommandPreparedStatementQuery, @@ -312,6 +328,7 @@ impl FlightSqlService for FlightSqlServiceImpl { endpoint: endpoints, total_records: -1_i64, total_bytes: -1_i64, + ordered: false, }; let resp = Response::new(info); Ok(resp) @@ -558,6 +575,17 @@ impl FlightSqlService for FlightSqlServiceImpl { Ok(-1) } + async fn do_put_substrait_plan( + &self, + _query: CommandStatementSubstraitPlan, + _request: Request>, + ) -> Result { + info!("do_put_prepared_statement_update"); + Err(Status::unimplemented( + "Implement do_put_prepared_statement_update", + )) + } + async fn do_action_create_prepared_statement( &self, query: ActionCreatePreparedStatementRequest, @@ -598,13 +626,72 @@ impl FlightSqlService for FlightSqlServiceImpl { &self, handle: ActionClosePreparedStatementRequest, _request: Request, - ) { + ) -> Result<(), Status> { let handle = std::str::from_utf8(&handle.prepared_statement_handle); if let Ok(handle) = handle { info!("do_action_close_prepared_statement: removing plan and results for {handle}"); let _ = self.remove_plan(handle); let _ = self.remove_result(handle); } + Ok(()) + } + + async fn do_action_create_prepared_substrait_plan( + &self, + _query: ActionCreatePreparedSubstraitPlanRequest, + _request: Request, + ) -> Result { + info!("do_action_create_prepared_substrait_plan"); + Err(Status::unimplemented( + "Implement do_action_create_prepared_substrait_plan", + )) + } + + async fn do_action_begin_transaction( + &self, + _query: ActionBeginTransactionRequest, + _request: Request, + ) -> Result { + info!("do_action_begin_transaction"); + Err(Status::unimplemented( + "Implement do_action_begin_transaction", + )) + } + + async fn do_action_end_transaction( + &self, + _query: ActionEndTransactionRequest, + _request: Request, + ) -> Result<(), Status> { + info!("do_action_end_transaction"); + Err(Status::unimplemented("Implement do_action_end_transaction")) + } + + async fn do_action_begin_savepoint( + &self, + _query: ActionBeginSavepointRequest, + _request: Request, + ) -> Result { + info!("do_action_begin_savepoint"); + Err(Status::unimplemented("Implement do_action_begin_savepoint")) + } + + async fn do_action_end_savepoint( + &self, + _query: ActionEndSavepointRequest, + _request: Request, + ) -> Result<(), Status> { + info!("do_action_end_savepoint"); + Err(Status::unimplemented("Implement do_action_end_savepoint")) + } + + async fn do_action_cancel_query( + &self, + _query: ActionCancelQueryRequest, + _request: Request, + ) -> Result { + info!("do_action_cancel_query"); + Err(Status::unimplemented("Implement do_action_cancel_query")) } async fn register_sql_info(&self, _id: i32, _result: &SqlInfo) {} diff --git a/datafusion-examples/examples/parquet_sql_multiple_files.rs b/datafusion-examples/examples/parquet_sql_multiple_files.rs index d4893818efcf..7bd35a7844fc 100644 --- a/datafusion-examples/examples/parquet_sql_multiple_files.rs +++ b/datafusion-examples/examples/parquet_sql_multiple_files.rs @@ -40,7 +40,7 @@ async fn main() -> Result<()> { // for the query ctx.register_listing_table( "my_table", - &format!("file://{testdata}"), + &format!("file://{testdata}/alltypes_plain.parquet"), listing_options, None, None, diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index a259fd80f465..6943fc7263e2 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -43,9 +43,9 @@ arrow = { workspace = true } arrow-array = { workspace = true } chrono = { version = "0.4", default-features = false } num_cpus = "1.13.0" -object_store = { version = "0.5.4", default-features = false, optional = true } +object_store = { version = "0.6.1", default-features = false, optional = true } parquet = { workspace = true, optional = true } -pyo3 = { version = "0.18.0", optional = true } +pyo3 = { version = "0.19.0", optional = true } sqlparser = "0.34" [dev-dependencies] diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs index 14f5380fa6e1..d18782e037ae 100644 --- a/datafusion/common/src/pyarrow.rs +++ b/datafusion/common/src/pyarrow.rs @@ -17,22 +17,23 @@ //! PyArrow -use crate::{DataFusionError, ScalarValue}; use arrow::array::ArrayData; -use arrow::pyarrow::PyArrowConvert; +use arrow::pyarrow::{FromPyArrow, ToPyArrow}; use arrow_array::Array; use pyo3::exceptions::PyException; use pyo3::prelude::PyErr; use pyo3::types::PyList; use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python}; +use crate::{DataFusionError, ScalarValue}; + impl From for PyErr { fn from(err: DataFusionError) -> PyErr { PyException::new_err(err.to_string()) } } -impl PyArrowConvert for ScalarValue { +impl FromPyArrow for ScalarValue { fn from_pyarrow(value: &PyAny) -> PyResult { let py = value.py(); let typ = value.getattr("type")?; @@ -49,7 +50,9 @@ impl PyArrowConvert for ScalarValue { Ok(scalar) } +} +impl ToPyArrow for ScalarValue { fn to_pyarrow(&self, py: Python) -> PyResult { let array = self.to_array(); // convert to pyarrow array using C data interface @@ -74,11 +77,12 @@ impl IntoPy for ScalarValue { #[cfg(test)] mod tests { - use super::*; use pyo3::prepare_freethreaded_python; use pyo3::py_run; use pyo3::types::PyDict; + use super::*; + fn init_python() { prepare_freethreaded_python(); Python::with_gil(|py| { diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index afdb755939e6..2764c5f6f155 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -78,7 +78,7 @@ lazy_static = { version = "^1.4.0" } log = "^0.4" num-traits = { version = "0.2", optional = true } num_cpus = "1.13.0" -object_store = "0.5.4" +object_store = "0.6.1" parking_lot = "0.12" parquet = { workspace = true } percent-encoding = "2.2.0" diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index cfbde4110985..362a681e4f55 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -458,6 +458,7 @@ mod tests { location: Path::parse("/")?, last_modified: DateTime::default(), size: usize::MAX, + e_tag: None, }; let num_rows_to_read = 100; diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index cffd6b9cafac..90037d32f549 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -103,7 +103,7 @@ pub(crate) mod test_util { use futures::StreamExt; use object_store::local::LocalFileSystem; use object_store::path::Path; - use object_store::{GetResult, ListResult, MultipartId}; + use object_store::{GetOptions, GetResult, ListResult, MultipartId}; use tokio::io::AsyncWrite; pub async fn scan_format( @@ -201,11 +201,11 @@ pub(crate) mod test_util { )) } - async fn get_range( + async fn get_opts( &self, _location: &Path, - _range: Range, - ) -> object_store::Result { + _opts: GetOptions, + ) -> object_store::Result { unimplemented!() } diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 767a6a22a42f..6957f367c9d2 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -599,7 +599,7 @@ pub(crate) mod test_util { } //// write batches chunk_size rows at a time - fn write_in_chunks( + fn write_in_chunks( writer: &mut ArrowWriter, batch: &RecordBatch, chunk_size: usize, @@ -618,7 +618,6 @@ mod tests { use super::super::test_util::scan_format; use crate::physical_plan::collect; use std::fmt::{Display, Formatter}; - use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; use super::*; @@ -641,7 +640,7 @@ mod tests { use log::error; use object_store::local::LocalFileSystem; use object_store::path::Path; - use object_store::{GetResult, ListResult, MultipartId}; + use object_store::{GetOptions, GetResult, ListResult, MultipartId}; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex}; @@ -739,17 +738,13 @@ mod tests { Err(object_store::Error::NotImplemented) } - async fn get(&self, _location: &Path) -> object_store::Result { - Err(object_store::Error::NotImplemented) - } - - async fn get_range( + async fn get_opts( &self, location: &Path, - range: Range, - ) -> object_store::Result { + options: GetOptions, + ) -> object_store::Result { self.request_count.fetch_add(1, Ordering::SeqCst); - self.inner.get_range(location, range).await + self.inner.get_opts(location, options).await } async fn head(&self, _location: &Path) -> object_store::Result { diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs index c7a1761151b0..a434a081e8b6 100644 --- a/datafusion/core/src/datasource/listing/mod.rs +++ b/datafusion/core/src/datasource/listing/mod.rs @@ -80,6 +80,7 @@ impl PartitionedFile { location: Path::from(path), last_modified: chrono::Utc.timestamp_nanos(0), size: size as usize, + e_tag: None, }, partition_values: vec![], range: None, @@ -94,6 +95,7 @@ impl PartitionedFile { location: Path::from(path), last_modified: chrono::Utc.timestamp_nanos(0), size: size as usize, + e_tag: None, }, partition_values: vec![], range: Some(FileRange { start, end }), diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index bea3f837cbeb..76711e6bfc72 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -1400,6 +1400,7 @@ mod tests { .unwrap() .into(), size: 1024, + e_tag: None, }; let cache = StatisticsCache::default(); diff --git a/datafusion/core/src/physical_plan/file_format/chunked_store.rs b/datafusion/core/src/physical_plan/file_format/chunked_store.rs index 49f5e5a93a4d..05528ed8a2b6 100644 --- a/datafusion/core/src/physical_plan/file_format/chunked_store.rs +++ b/datafusion/core/src/physical_plan/file_format/chunked_store.rs @@ -20,7 +20,7 @@ use bytes::{BufMut, Bytes, BytesMut}; use futures::stream::BoxStream; use futures::StreamExt; use object_store::path::Path; -use object_store::{GetResult, ListResult, ObjectMeta, ObjectStore}; +use object_store::{GetOptions, GetResult, ListResult, ObjectMeta, ObjectStore}; use object_store::{MultipartId, Result}; use std::fmt::{Debug, Display, Formatter}; use std::ops::Range; @@ -151,6 +151,10 @@ impl ObjectStore for ChunkedStore { } } + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.inner.get_opts(location, options).await + } + async fn get_range(&self, location: &Path, range: Range) -> Result { self.inner.get_range(location, range).await } diff --git a/datafusion/core/src/physical_plan/file_format/mod.rs b/datafusion/core/src/physical_plan/file_format/mod.rs index f67636e6af1d..831f81e5997a 100644 --- a/datafusion/core/src/physical_plan/file_format/mod.rs +++ b/datafusion/core/src/physical_plan/file_format/mod.rs @@ -1312,6 +1312,7 @@ mod tests { location: object_store::path::Path::parse(path).unwrap(), last_modified: Utc::now(), size: 42, + e_tag: None, }; PartitionedFile { diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs b/datafusion/core/src/physical_plan/file_format/parquet.rs index a281f60a940a..a0c7402cc462 100644 --- a/datafusion/core/src/physical_plan/file_format/parquet.rs +++ b/datafusion/core/src/physical_plan/file_format/parquet.rs @@ -1724,6 +1724,7 @@ mod tests { location, last_modified: Utc.timestamp_nanos(0), size: 1337, + e_tag: None, }, partition_values: vec![], range: None, diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs index 3e2d1fdae063..425d0724ea4f 100644 --- a/datafusion/core/src/test/object_store.rs +++ b/datafusion/core/src/test/object_store.rs @@ -51,5 +51,6 @@ pub fn local_unpartitioned_file(path: impl AsRef) -> ObjectMeta location, last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(), size: metadata.len() as usize, + e_tag: None, } } diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 17766142bd1d..ed65c4122cd8 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -109,6 +109,7 @@ impl TestParquetFile { location: Path::parse(canonical_path.to_str().unwrap_or_default())?, last_modified: Default::default(), size, + e_tag: None, }; Ok(Self { diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs index 501623dbd945..302baca51678 100644 --- a/datafusion/core/tests/parquet/custom_reader.rs +++ b/datafusion/core/tests/parquet/custom_reader.rs @@ -185,6 +185,7 @@ async fn store_parquet_in_memory( .expect("creating path"), last_modified: chrono::DateTime::from(SystemTime::now()), size: buf.len(), + e_tag: None, }; (meta, Bytes::from(buf)) diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 8be7aeb0ca64..1c912883c784 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -47,6 +47,7 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> ParquetExec { location, last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(), size: metadata.len() as usize, + e_tag: None, }; let schema = ParquetFormat::default() diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs index b5ceabad1d3e..e7b1584e2155 100644 --- a/datafusion/core/tests/parquet/schema_coercion.rs +++ b/datafusion/core/tests/parquet/schema_coercion.rs @@ -192,5 +192,6 @@ pub fn local_unpartitioned_file(path: impl AsRef) -> ObjectMeta location, last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(), size: metadata.len() as usize, + e_tag: None, } } diff --git a/datafusion/core/tests/path_partition.rs b/datafusion/core/tests/path_partition.rs index afaac5a7bdaa..894ceb1b9800 100644 --- a/datafusion/core/tests/path_partition.rs +++ b/datafusion/core/tests/path_partition.rs @@ -43,7 +43,7 @@ use datafusion_common::ScalarValue; use futures::stream; use futures::stream::BoxStream; use object_store::{ - path::Path, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, + path::Path, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, }; use tokio::io::AsyncWrite; use url::Url; @@ -640,7 +640,11 @@ impl ObjectStore for MirroringObjectStore { unimplemented!() } - async fn get(&self, location: &Path) -> object_store::Result { + async fn get_opts( + &self, + location: &Path, + _options: GetOptions, + ) -> object_store::Result { self.files.iter().find(|x| *x == location).unwrap(); let path = std::path::PathBuf::from(&self.mirrored_file); let file = File::open(&path).unwrap(); @@ -671,6 +675,7 @@ impl ObjectStore for MirroringObjectStore { location: location.clone(), last_modified: Utc.timestamp_nanos(0), size: self.file_size as usize, + e_tag: None, }) } @@ -696,6 +701,7 @@ impl ObjectStore for MirroringObjectStore { location: location.clone(), last_modified: Utc.timestamp_nanos(0), size: self.file_size as usize, + e_tag: None, }) }) }, @@ -732,6 +738,7 @@ impl ObjectStore for MirroringObjectStore { location: k.clone(), last_modified: Utc.timestamp_nanos(0), size: self.file_size as usize, + e_tag: None, }; objects.push(object); } diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 2f8656df09a3..d6203152714e 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -38,7 +38,7 @@ datafusion-common = { path = "../common", version = "26.0.0" } datafusion-expr = { path = "../expr", version = "26.0.0" } hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" -object_store = "0.5.4" +object_store = "0.6.1" parking_lot = "0.12" rand = "0.8" tempfile = "3" diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index c3f5227883c5..70137f63cf15 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -45,7 +45,7 @@ chrono = { version = "0.4", default-features = false } datafusion = { path = "../core", version = "26.0.0" } datafusion-common = { path = "../common", version = "26.0.0" } datafusion-expr = { path = "../expr", version = "26.0.0" } -object_store = { version = "0.5.4" } +object_store = { version = "0.6.1" } pbjson = { version = "0.5", optional = true } prost = "0.11.0" serde = { version = "1.0", optional = true } diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 8af481518192..42672e504901 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -475,6 +475,7 @@ impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile { location: Path::from(val.path.as_str()), last_modified: Utc.timestamp_nanos(val.last_modified_ns as i64), size: val.size as usize, + e_tag: None, }, partition_values: val .partition_values diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 8a20d5d20f5f..05300f2aaa87 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -32,7 +32,7 @@ async-recursion = "1.0" chrono = { version = "0.4.23", default-features = false } datafusion = { version = "26.0.0", path = "../core" } itertools = "0.10.5" -object_store = "0.5.4" +object_store = "0.6.1" prost = "0.11" prost-types = "0.11" substrait = "0.10.0" diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs index 6273e0e58932..1de3937c5926 100644 --- a/datafusion/substrait/src/physical_plan/consumer.rs +++ b/datafusion/substrait/src/physical_plan/consumer.rs @@ -91,6 +91,7 @@ pub async fn from_substrait_rel( last_modified: last_modified.into(), location: path.into(), size, + e_tag: None, }, partition_values: vec![], range: None,