From 065892fb1fad4f249befa1fe3327b4107d0fd7a6 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 4 Dec 2022 22:42:14 -0800 Subject: [PATCH 1/2] read special version of dataset --- cpp/src/lance/arrow/dataset.cc | 11 ++++++++++- .../duckdb/src/lance/duckdb/lance_reader.cc | 18 ++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/cpp/src/lance/arrow/dataset.cc b/cpp/src/lance/arrow/dataset.cc index 00e89791cb..aa6a13efb2 100644 --- a/cpp/src/lance/arrow/dataset.cc +++ b/cpp/src/lance/arrow/dataset.cc @@ -313,7 +313,16 @@ ::arrow::Result> LanceDataset::Make( if (finfo.type() == ::arrow::fs::FileType::NotFound) { return ::arrow::Status::IOError("Manifest not found: ", manifest_path); } - ARROW_ASSIGN_OR_RAISE(auto manifest, OpenManifest(fs, manifest_path)); + auto result = OpenManifest(fs, manifest_path); + + if (result.status().IsIOError() && result.status().message().starts_with("Path does not exist")) { + if (!version) { + return ::arrow::Status::IOError("Can not find the latest version of the dataset"); + } + return ::arrow::Status::IOError("Version ", version.value(), " does not exist"); + } + + ARROW_ASSIGN_OR_RAISE(auto manifest, result); auto impl = std::make_unique(fs, base_uri, manifest); return std::shared_ptr(new LanceDataset(std::move(impl))); } diff --git a/integration/duckdb/src/lance/duckdb/lance_reader.cc b/integration/duckdb/src/lance/duckdb/lance_reader.cc index 9bc93f7a14..ca1a9a8ea4 100644 --- a/integration/duckdb/src/lance/duckdb/lance_reader.cc +++ b/integration/duckdb/src/lance/duckdb/lance_reader.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -59,9 +60,13 @@ std::unique_ptr<::duckdb::FunctionData> LanceScanBind( std::vector<::duckdb::LogicalType> &return_types, std::vector &names) { auto dataset_uri = input.inputs[0].GetValue(); + std::optional version = std::nullopt; + if (input.inputs.size() > 1) { + version = input.inputs[1].GetValue(); + } std::string path; auto fs = GetResult(::arrow::fs::FileSystemFromUriOrPath(dataset_uri, &path)); - auto dataset = GetResult(lance::arrow::LanceDataset::Make(std::move(fs), path)); + auto dataset = GetResult(lance::arrow::LanceDataset::Make(std::move(fs), path, version)); auto schema = dataset->schema(); auto bind_data = std::make_unique(); bind_data->dataset = std::move(dataset); @@ -282,8 +287,17 @@ ::duckdb::TableFunctionSet GetLanceReaderFunction() { table_function.projection_pushdown = true; table_function.filter_pushdown = true; table_function.filter_prune = true; - func_set.AddFunction(table_function); + + ::duckdb::TableFunction scan_with_version( + {::duckdb::LogicalType::VARCHAR, ::duckdb::LogicalType::INTEGER}, + LanceScan, + LanceScanBind, + InitGlobal); + scan_with_version.projection_pushdown = true; + scan_with_version.filter_pushdown = true; + scan_with_version.filter_prune = true; + func_set.AddFunction(scan_with_version); return func_set; } From 1ba865e039bc33bddbbc7618e4617a8d236c2d7b Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 4 Dec 2022 22:52:56 -0800 Subject: [PATCH 2/2] reduce code --- .../duckdb/src/lance/duckdb/lance_reader.cc | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/integration/duckdb/src/lance/duckdb/lance_reader.cc b/integration/duckdb/src/lance/duckdb/lance_reader.cc index ca1a9a8ea4..3b4109914c 100644 --- a/integration/duckdb/src/lance/duckdb/lance_reader.cc +++ b/integration/duckdb/src/lance/duckdb/lance_reader.cc @@ -282,22 +282,16 @@ void LanceScan(::duckdb::ClientContext &context, ::duckdb::TableFunctionSet GetLanceReaderFunction() { ::duckdb::TableFunctionSet func_set("lance_scan"); - ::duckdb::TableFunction table_function( - {::duckdb::LogicalType::VARCHAR}, LanceScan, LanceScanBind, InitGlobal); - table_function.projection_pushdown = true; - table_function.filter_pushdown = true; - table_function.filter_prune = true; - func_set.AddFunction(table_function); - - ::duckdb::TableFunction scan_with_version( - {::duckdb::LogicalType::VARCHAR, ::duckdb::LogicalType::INTEGER}, - LanceScan, - LanceScanBind, - InitGlobal); - scan_with_version.projection_pushdown = true; - scan_with_version.filter_pushdown = true; - scan_with_version.filter_prune = true; - func_set.AddFunction(scan_with_version); + for (auto &arguments : std::vector>( + {{::duckdb::LogicalType::VARCHAR}, + {::duckdb::LogicalType::VARCHAR, ::duckdb::LogicalType::INTEGER}})) { + ::duckdb::TableFunction table_function( + arguments, LanceScan, LanceScanBind, InitGlobal); + table_function.projection_pushdown = true; + table_function.filter_pushdown = true; + table_function.filter_prune = true; + func_set.AddFunction(table_function); + } return func_set; }