diff --git a/.clang-format b/.clang-format index 9f379d799b..8dab6cde60 100644 --- a/.clang-format +++ b/.clang-format @@ -2,6 +2,7 @@ BasedOnStyle: Google ColumnLimit: 100 BinPackArguments: false BinPackParameters: false +ReferenceAlignment: Left --- Language: Proto BasedOnStyle: Google \ No newline at end of file diff --git a/.github/workflows/duckdb.yml b/.github/workflows/duckdb.yml index ed3a646641..8b15ec07ce 100644 --- a/.github/workflows/duckdb.yml +++ b/.github/workflows/duckdb.yml @@ -14,10 +14,20 @@ jobs: defaults: run: working-directory: ./integration/duckdb + env: + ArrowVersion: 10.0.1-1 steps: - uses: actions/checkout@v2 - name: ccache uses: hendrikmuhs/ccache-action@v1 + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev=${ArrowVersion} libarrow-dataset-dev=${ArrowVersion} libparquet-dev=${ArrowVersion} - name: Cmake run: cmake -B build - name: Build @@ -30,6 +40,12 @@ jobs: working-directory: ./integration/duckdb steps: - uses: actions/checkout@v2 + - name: Install dependencies + run: | + brew update + cd $(brew --repository) + git checkout 3.6.8 # Arrow 10.0 + brew install apache-arrow - name: Cmake run: cmake -B build - name: Build diff --git a/cpp/src/lance/format/CMakeLists.txt b/cpp/src/lance/format/CMakeLists.txt index dc8eb312b2..865b8b3be9 100644 --- a/cpp/src/lance/format/CMakeLists.txt +++ b/cpp/src/lance/format/CMakeLists.txt @@ -16,7 +16,7 @@ protobuf_generate_cpp( PROTO_SRCS PROTO_HDRS - ${CMAKE_SOURCE_DIR}/../protos/format.proto + ${PROJECT_SOURCE_DIR}/../protos/format.proto ) add_library( diff --git a/integration/duckdb/CMakeLists.txt b/integration/duckdb/CMakeLists.txt index 934a0d9779..b27cb2cdb6 100644 --- a/integration/duckdb/CMakeLists.txt +++ b/integration/duckdb/CMakeLists.txt @@ -4,7 +4,7 @@ if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() -add_compile_options(-mf16c) # opencv +#add_compile_options(-mf16c) # opencv project(lance_duckdb CXX) option(LANCE_BUILD_PYTORCH "Build with PyTorch" TRUE) @@ -88,7 +88,7 @@ endif() FetchContent_MakeAvailable(${available_contents}) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) include_directories(${duckdb_SOURCE_DIR}/src/include) @@ -109,12 +109,24 @@ if(LANCE_BUILD_PYTORCH) include_directories(${OpenCV_INCLUDE_DIRS}) endif() +# Add lance core as dependency +find_package(Arrow REQUIRED) +find_package(ArrowDataset REQUIRED) +include_directories(${CMAKE_BINARY_DIR}/lance/src ../../cpp/include ../../cpp/src) +add_subdirectory(../../cpp lance) + include_directories(src) set(LANCE_EXT_SOURCE_COMMON + src/lance/duckdb/lance_reader.cc + src/lance/duckdb/lance_reader.h src/lance/duckdb/lance-extension.cc + src/lance/duckdb/lance.cc + src/lance/duckdb/lance.h src/lance/duckdb/list_functions.cc - src/lance/duckdb/vector_functions.cc) + src/lance/duckdb/list_functions.h + src/lance/duckdb/vector_functions.cc +) set(LANCE_EXT_SOURCE_ML src/lance/duckdb/ml/catalog.cc @@ -132,6 +144,7 @@ endif() # add_library(lance_extension STATIC ${LANCE_EXT_SOURCES}) set(PARAMETERS "-warnings") build_loadable_extension(lance ${PARAMETERS} ${LANCE_EXT_SOURCES}) +target_link_libraries(lance_loadable_extension lance ArrowDataset::arrow_dataset_shared fmt::fmt) if(LANCE_BUILD_PYTORCH) target_link_libraries(lance_loadable_extension "${TORCH_LIBRARIES}" diff --git a/integration/duckdb/src/lance/duckdb/lance-extension.cc b/integration/duckdb/src/lance/duckdb/lance-extension.cc index 30bbe6f46b..57c37c63ed 100644 --- a/integration/duckdb/src/lance/duckdb/lance-extension.cc +++ b/integration/duckdb/src/lance/duckdb/lance-extension.cc @@ -18,9 +18,10 @@ #include +#include "lance/duckdb/lance_reader.h" #include "lance/duckdb/list_functions.h" -#include "lance/duckdb/vector_functions.h" #include "lance/duckdb/ml/functions.h" +#include "lance/duckdb/vector_functions.h" namespace duckdb { @@ -29,6 +30,7 @@ void LanceExtension::Load(::duckdb::DuckDB &db) { con.BeginTransaction(); auto &context = *con.context; auto &catalog = ::duckdb::Catalog::GetCatalog(context); + auto &config = DBConfig::GetConfig(*db.instance); for (auto &func : lance::duckdb::GetListFunctions()) { catalog.CreateFunction(context, func.get()); @@ -46,11 +48,17 @@ void LanceExtension::Load(::duckdb::DuckDB &db) { catalog.CreateTableFunction(context, func.get()); } + auto scan_func = lance::duckdb::GetLanceReaderFunction(); + ::duckdb::CreateTableFunctionInfo scan(scan_func); + catalog.CreateTableFunction(context, &scan); + + config.replacement_scans.emplace_back(lance::duckdb::LanceScanReplacement); + con.Commit(); } std::string LanceExtension::Name() { return {"lance"}; } -}; +}; // namespace duckdb extern "C" { diff --git a/integration/duckdb/src/lance/duckdb/lance-extension.h b/integration/duckdb/src/lance/duckdb/lance-extension.h index 57ff598a8e..1178111f42 100644 --- a/integration/duckdb/src/lance/duckdb/lance-extension.h +++ b/integration/duckdb/src/lance/duckdb/lance-extension.h @@ -22,7 +22,9 @@ namespace duckdb { class LanceExtension : public Extension { public: + void Load(DuckDB &db) override; + std::string Name() override; }; diff --git a/integration/duckdb/src/lance/duckdb/lance.cc b/integration/duckdb/src/lance/duckdb/lance.cc new file mode 100644 index 0000000000..371f710eef --- /dev/null +++ b/integration/duckdb/src/lance/duckdb/lance.cc @@ -0,0 +1,98 @@ +// Copyright 2022 Lance Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "lance/duckdb/lance.h" + +#include + +#include +#include + +namespace lance::duckdb { + +namespace { + +inline ::duckdb::LogicalType ToLogicalType(const ::arrow::DictionaryType& dtype) { + return lance::duckdb::ToLogicalType(*dtype.value_type()); +} + +inline ::duckdb::LogicalType ToLogicalType(const ::arrow::StructType& struct_type) { + ::duckdb::child_list_t<::duckdb::LogicalType> children; + for (auto& child : struct_type.fields()) { + children.emplace_back( + std::make_pair(child->name(), lance::duckdb::ToLogicalType(*child->type()))); + } + return ::duckdb::LogicalType::STRUCT(children); +} + +template +inline ::duckdb::LogicalType ToLogicalType(const ::arrow::DataType& dtype) { + auto& list_type = dynamic_cast(dtype); + auto child_type = lance::duckdb::ToLogicalType(*list_type.value_type()); + return ::duckdb::LogicalType::LIST(child_type); +} + +} // namespace + +::duckdb::LogicalType ToLogicalType(const ::arrow::DataType& arrow_type) { + switch (arrow_type.id()) { + case ::arrow::Type::BOOL: + return ::duckdb::LogicalType::BOOLEAN; + case ::arrow::Type::INT8: + return ::duckdb::LogicalType::TINYINT; + case ::arrow::Type::UINT8: + return ::duckdb::LogicalType::UTINYINT; + case ::arrow::Type::INT16: + return ::duckdb::LogicalType::SMALLINT; + case ::arrow::Type::UINT16: + return ::duckdb::LogicalType::USMALLINT; + case ::arrow::Type::INT32: + return ::duckdb::LogicalType::INTEGER; + case ::arrow::Type::UINT64: + return ::duckdb::LogicalType::UINTEGER; + case ::arrow::Type::FLOAT: + case ::arrow::Type::HALF_FLOAT: + return ::duckdb::LogicalType::FLOAT; + case ::arrow::Type::DOUBLE: + return ::duckdb::LogicalType::DOUBLE; + case ::arrow::Type::STRING: + case ::arrow::Type::LARGE_STRING: + return ::duckdb::LogicalType::VARCHAR; + case ::arrow::Type::BINARY: + case ::arrow::Type::LARGE_BINARY: + return ::duckdb::LogicalType::BLOB; + case ::arrow::Type::TIME32: + case ::arrow::Type::TIME64: + return ::duckdb::LogicalType::TIME; + case ::arrow::Type::TIMESTAMP: + return ::duckdb::LogicalType::TIMESTAMP; + case ::arrow::Type::DATE32: + case ::arrow::Type::DATE64: + return ::duckdb::LogicalType::DATE; + case ::arrow::Type::DICTIONARY: + return ToLogicalType(dynamic_cast(arrow_type)); + case ::arrow::Type::STRUCT: + return ToLogicalType(dynamic_cast(arrow_type)); + case ::arrow::Type::LIST: + return ToLogicalType<::arrow::ListType>(arrow_type); + case ::arrow::Type::FIXED_SIZE_LIST: + return ToLogicalType<::arrow::FixedSizeListType>(arrow_type); + default: + throw ::duckdb::InvalidInputException("Does not support type: %s", + arrow_type.ToString().c_str()); + } +} + +} // namespace lance::duckdb \ No newline at end of file diff --git a/integration/duckdb/src/lance/duckdb/lance.h b/integration/duckdb/src/lance/duckdb/lance.h new file mode 100644 index 0000000000..0fa4f7dede --- /dev/null +++ b/integration/duckdb/src/lance/duckdb/lance.h @@ -0,0 +1,47 @@ +// Copyright 2022 Lance Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#pragma once + +/// \brief Lance Core Adaptors and utilities + +#include +#include +#include + +#include +#include + +namespace lance::duckdb { + +template +T GetResult(::arrow::Result&& result) { + if (result.ok()) { + return std::move(result.ValueOrDie()); + } + throw E(result.status().message()); +} + +template +void CheckStatus(const ::arrow::Status& status) { + if (!status.ok()) { + throw E(status.message()); + } +} + +/// Convert Arrow and Lance types into DuckDB logical type +::duckdb::LogicalType ToLogicalType(const ::arrow::DataType& arrow_type); + +} // namespace lance::duckdb diff --git a/integration/duckdb/src/lance/duckdb/lance_reader.cc b/integration/duckdb/src/lance/duckdb/lance_reader.cc new file mode 100644 index 0000000000..9bc93f7a14 --- /dev/null +++ b/integration/duckdb/src/lance/duckdb/lance_reader.cc @@ -0,0 +1,307 @@ +// Copyright 2022 Lance Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "lance/duckdb/lance_reader.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lance/arrow/type.h" +#include "lance/duckdb/lance.h" + +namespace lance::duckdb { + +namespace { + +// Forward declaration +void ArrowArrayToVector(const std::shared_ptr<::arrow::Array> &arr, ::duckdb::Vector *out); + +struct GlobalScanState : public ::duckdb::GlobalTableFunctionState { + std::shared_ptr dataset; + ::arrow::dataset::TaggedRecordBatchGenerator batch_generator; +}; + +struct LocalScanState : public ::duckdb::LocalTableFunctionState {}; + +/// BindData for Lance Scan +struct ScanBindData : public ::duckdb::TableFunctionData { + std::shared_ptr dataset; +}; + +std::unique_ptr<::duckdb::FunctionData> LanceScanBind( + ::duckdb::ClientContext &context, + ::duckdb::TableFunctionBindInput &input, + std::vector<::duckdb::LogicalType> &return_types, + std::vector &names) { + auto dataset_uri = input.inputs[0].GetValue(); + std::string path; + auto fs = GetResult(::arrow::fs::FileSystemFromUriOrPath(dataset_uri, &path)); + auto dataset = GetResult(lance::arrow::LanceDataset::Make(std::move(fs), path)); + auto schema = dataset->schema(); + auto bind_data = std::make_unique(); + bind_data->dataset = std::move(dataset); + for (int i = 0; i < schema->fields().size(); ++i) { + const auto &field = schema->field(i); + names.emplace_back(field->name()); + return_types.emplace_back(ToLogicalType(*field->type())); + bind_data->column_ids.emplace_back(i); + } + return std::move(bind_data); +} + +std::unique_ptr<::duckdb::GlobalTableFunctionState> InitGlobal( + ::duckdb::ClientContext &context, ::duckdb::TableFunctionInitInput &input) { + auto bind_data = dynamic_cast(input.bind_data); + assert(bind_data != nullptr); + + auto state = std::make_unique(); + state->dataset = bind_data->dataset; + + auto schema = state->dataset->schema(); + std::vector columns; + for (auto &column_id : input.column_ids) { + columns.emplace_back(schema->field(column_id)->name()); + } + + auto builder = GetResult(state->dataset->NewScan()); + CheckStatus(builder->Project(columns)); + auto scanner = GetResult(builder->Finish()); + state->batch_generator = GetResult(scanner->ScanBatchesAsync()); + return state; +} + +/// Convert numeric array to duckdb vector. +template +void ToVector(const std::shared_ptr<::arrow::Array> &arr, ::duckdb::Vector *out) { + // TODO: dynamic_pointer_cast does not work here, IDK why. + auto array = std::static_pointer_cast::ArrayType>(arr); + assert(array != nullptr); + // TODO: How to use zero copy to move data from arrow to duckdb. + for (int i = 0; i < array->length(); ++i) { + out->SetValue(i, ::duckdb::Value::CreateValue(array->Value(i))); + } + out->SetVectorType(::duckdb::VectorType::FLAT_VECTOR); +} + +/// Convert a String array into duckdb vector. +template <> +void ToVector<::arrow::StringType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + auto array = std::static_pointer_cast<::arrow::StringArray>(arr); + assert(array != nullptr); + // TODO: How to use zero copy to move data from arrow to duckdb. + for (int i = 0; i < array->length(); ++i) { + out->SetValue(i, std::string(array->Value(i))); + } + out->SetVectorType(::duckdb::VectorType::FLAT_VECTOR); +} + +/// Convert a Binary array into duckdb vector. +template <> +void ToVector<::arrow::BinaryType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + auto array = std::static_pointer_cast<::arrow::BinaryArray>(arr); + assert(array != nullptr); + // TODO: How to use zero copy to move data from arrow to duckdb. + for (int i = 0; i < array->length(); ++i) { + auto val = array->Value(i); + out->SetValue(i, ::duckdb::Value::BLOB((::duckdb::data_ptr_t)val.data(), val.size())); + } + out->SetVectorType(::duckdb::VectorType::FLAT_VECTOR); +} + +template <> +void ToVector<::arrow::DictionaryType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + auto array = std::static_pointer_cast<::arrow::DictionaryArray>(arr); + // TODO: zero copy + out->SetVectorType(::duckdb::VectorType::FLAT_VECTOR); + auto dict_arr = std::dynamic_pointer_cast<::arrow::StringArray>(array->dictionary()); + auto indices_arr = std::static_pointer_cast<::arrow::Int8Array>(array->indices()); + for (int i = 0; i < indices_arr->length(); ++i) { + auto idx = indices_arr->Value(i); + out->SetValue(i, std::string(dict_arr->Value(idx))); + } +} + +/// Convert `arrow::Array` to duckdb Struct Vector. +template <> +void ToVector<::arrow::StructType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + assert(arr->type_id() == ::arrow::Type::STRUCT); + auto struct_arr = std::static_pointer_cast<::arrow::StructArray>(arr); + auto &vector_children = ::duckdb::StructVector::GetEntries(*out); + + // Sanity checks + if (struct_arr->num_fields() != vector_children.size()) { + throw ::duckdb::InvalidInputException("Struct fields are not expected: %lu != %lu", + struct_arr->num_fields(), + vector_children.size()); + } + + for (int i = 0; i < struct_arr->num_fields(); i++) { + ArrowArrayToVector(struct_arr->field(i), vector_children[i].get()); + } +} + +template <> +void ToVector<::arrow::ListType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + /// TODO: zero copy vector construction. + assert(arr->type_id() == ::arrow::Type::LIST); + auto list_arr = std::static_pointer_cast<::arrow::ListArray>(arr); + for (int i = 0; i < list_arr->length(); ++i) { + auto scalar = GetResult(list_arr->GetScalar(i)); + auto list_scalar = std::static_pointer_cast<::arrow::ListScalar>(scalar); + ::duckdb::Vector elem_vector(ToLogicalType(*list_scalar->value->type())); + ArrowArrayToVector(list_scalar->value, &elem_vector); + } +} + +template <> +void ToVector<::arrow::FixedSizeListType>(const std::shared_ptr<::arrow::Array> &arr, + ::duckdb::Vector *out) { + /// TODO: zero copy vector construction. + assert(arr->type_id() == ::arrow::Type::FIXED_SIZE_LIST); + auto list_arr = std::static_pointer_cast<::arrow::FixedSizeListArray>(arr); + for (int i = 0; i < list_arr->length(); ++i) { + auto scalar = GetResult(list_arr->GetScalar(i)); + auto list_scalar = std::static_pointer_cast<::arrow::FixedSizeListScalar>(scalar); + ::duckdb::Vector elem_vector(ToLogicalType(*list_scalar->value->type())); + ArrowArrayToVector(list_scalar->value, &elem_vector); + } +} + +/// Convert a `arrow::Array` to `duckdb::Vector`. +void ArrowArrayToVector(const std::shared_ptr<::arrow::Array> &arr, ::duckdb::Vector *out) { + switch (arr->type_id()) { + case ::arrow::Type::BOOL: + ToVector<::arrow::BooleanType>(arr, out); + break; + case ::arrow::Type::UINT8: + ToVector<::arrow::UInt8Type>(arr, out); + break; + case ::arrow::Type::INT8: + ToVector<::arrow::Int8Type>(arr, out); + break; + case ::arrow::Type::UINT16: + ToVector<::arrow::UInt16Type>(arr, out); + break; + case ::arrow::Type::INT16: + ToVector<::arrow::Int16Type>(arr, out); + break; + case ::arrow::Type::UINT32: + ToVector<::arrow::UInt32Type>(arr, out); + break; + case ::arrow::Type::INT32: + ToVector<::arrow::Int32Type>(arr, out); + break; + case ::arrow::Type::UINT64: + ToVector<::arrow::UInt64Type>(arr, out); + break; + case ::arrow::Type::INT64: + ToVector<::arrow::Int64Type>(arr, out); + break; + case ::arrow::Type::FLOAT: + ToVector<::arrow::FloatType>(arr, out); + break; + case ::arrow::Type::DOUBLE: + ToVector<::arrow::FloatType>(arr, out); + break; + case ::arrow::Type::STRING: + ToVector<::arrow::StringType>(arr, out); + break; + case ::arrow::Type::BINARY: + ToVector<::arrow::BinaryType>(arr, out); + break; + case ::arrow::Type::DICTIONARY: + ToVector<::arrow::DictionaryType>(arr, out); + break; + case ::arrow::Type::STRUCT: + ToVector<::arrow::StructType>(arr, out); + break; + case ::arrow::Type::LIST: + ToVector<::arrow::ListType>(arr, out); + break; + case ::arrow::Type::FIXED_SIZE_LIST: + ToVector<::arrow::FixedSizeListType>(arr, out); + break; + default: + throw ::duckdb::IOException("Unsupported Arrow Type: " + arr->type()->ToString()); + } +} + +void LanceScan(::duckdb::ClientContext &context, + ::duckdb::TableFunctionInput &input, + ::duckdb::DataChunk &output) { + auto global_state = dynamic_cast(input.global_state); + auto fut = global_state->batch_generator(); + auto batch = GetResult(fut.MoveResult()); + if (batch.record_batch == nullptr) { + return; + } + output.SetCardinality(batch.record_batch->num_rows()); + for (int i = 0; i < output.data.size(); ++i) { + auto col = batch.record_batch->column(i); + ArrowArrayToVector(col, &output.data[i]); + } +} + +} // namespace + +::duckdb::TableFunctionSet GetLanceReaderFunction() { + ::duckdb::TableFunctionSet func_set("lance_scan"); + + ::duckdb::TableFunction table_function( + {::duckdb::LogicalType::VARCHAR}, LanceScan, LanceScanBind, InitGlobal); + table_function.projection_pushdown = true; + table_function.filter_pushdown = true; + table_function.filter_prune = true; + + func_set.AddFunction(table_function); + return func_set; +} + +std::unique_ptr<::duckdb::TableFunctionRef> LanceScanReplacement( + ::duckdb::ClientContext &context, + const ::std::string &table_name, + ::duckdb::ReplacementScanData *data) { + auto lower_name = ::duckdb::StringUtil::Lower(table_name); + if (!::duckdb::StringUtil::EndsWith(lower_name, ".lance")) { + return nullptr; + } + auto table_function = ::duckdb::make_unique<::duckdb::TableFunctionRef>(); + ::std::vector<::std::unique_ptr<::duckdb::ParsedExpression>> children; + children.emplace_back( + ::std::make_unique<::duckdb::ConstantExpression>(::duckdb::Value(table_name))); + table_function->function = + ::std::make_unique<::duckdb::FunctionExpression>("lance_scan", ::std::move(children)); + return table_function; +} + +} // namespace lance::duckdb \ No newline at end of file diff --git a/integration/duckdb/src/lance/duckdb/lance_reader.h b/integration/duckdb/src/lance/duckdb/lance_reader.h new file mode 100644 index 0000000000..f3a6f4d00b --- /dev/null +++ b/integration/duckdb/src/lance/duckdb/lance_reader.h @@ -0,0 +1,35 @@ +// Copyright 2022 Lance Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#pragma once + +#include +#include +#include +#include + +namespace lance::duckdb { + +/// Get lance reader: +/// +/// SELECT * from lance_scan("s3://path/to/dataset"); +::duckdb::TableFunctionSet GetLanceReaderFunction(); + +std::unique_ptr<::duckdb::TableFunctionRef> LanceScanReplacement( + ::duckdb::ClientContext &context, + const ::std::string &table_name, + ::duckdb::ReplacementScanData *data); + +} // namespace lance::duckdb