diff --git a/.github/workflows/duckdb.yml b/.github/workflows/duckdb.yml deleted file mode 100644 index a215ae84a6..0000000000 --- a/.github/workflows/duckdb.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: DuckDB Extension -on: - push: - branches: - - main - pull_request: - paths: - - integration/duckdb_lance/* - - .github/workflows/duckdb.yml - - ./rust/* - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - Linux: - runs-on: ubuntu-22.04 - timeout-minutes: 45 - defaults: - run: - working-directory: ./integration/duckdb_lance - steps: - - uses: actions/checkout@v4 - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y protobuf-compiler libssl-dev - - name: Checkout submodules - run: | - git submodule init - git submodule update - - name: Make - run: make build - # - name: Upload Lance duckdb extension - # uses: actions/upload-artifact@v3 - # with: - # name: duckdb-ubuntu-extension - # path: integration/duckdb/build/lance.duckdb_extension - # retention-days: 1 - MacOS: - runs-on: macos-14 - timeout-minutes: 40 - defaults: - run: - working-directory: ./integration/duckdb_lance - steps: - - uses: actions/checkout@v4 - - name: Install dependencies - run: | - brew install protobuf - - name: Checkout submodules - run: | - git submodule init - git submodule update - - name: Build - run: make build - # - name: Upload Lance duckdb extension - # uses: actions/upload-artifact@v3 - # with: - # name: duckdb-intel-mac-extension - # path: integration/duckdb/build/lance.duckdb_extension - # retention-days: 1 - diff --git a/.gitignore b/.gitignore index a70b512b40..e5a0cce12c 100644 --- a/.gitignore +++ b/.gitignore @@ -67,11 +67,6 @@ docs/api/python **/.ipynb_checkpoints/ docs/notebooks - -integration/duckdb/*-build -integration/duckdb/lance.duckdb_extension.*.zip - -notebooks/lance.duckdb_extension notebooks/sift notebooks/image_data/data benchmarks/sift/sift diff --git a/.gitmodules b/.gitmodules index 05d79fc7c9..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +0,0 @@ -[submodule "integration/duckdb_lance/duckdb"] - path = integration/duckdb_lance/duckdb - url = https://github.com/duckdb/duckdb.git -[submodule "integration/duckdb_lance/duckdb-ext/duckdb"] - path = integration/duckdb_lance/duckdb-ext/duckdb - url = https://github.com/duckdb/duckdb.git diff --git a/docs/contributing.rst b/docs/contributing.rst index a7e6d72206..ec6114c169 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -127,13 +127,6 @@ Example Notebooks Example notebooks are under `examples`. These are standalone notebooks you should be able to download and run. -DuckDB Extension -~~~~~~~~~~~~~~~~ - -In python, Lance integrates with DuckDB via Apache Arrow. Outside of python, the highly experimental duckdb extension for Lance -lives under `integration/duckdb_lance`. This uses the DuckDB `Rust extension framework `_. -The main code lives under `integration/duckdb_lance/src`. Follow the integration README for more details. - Benchmarks ~~~~~~~~~~ diff --git a/integration/duckdb_lance/CMakeLists.txt b/integration/duckdb_lance/CMakeLists.txt deleted file mode 100644 index b3a1e7976f..0000000000 --- a/integration/duckdb_lance/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2023 Lance Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Still need to use cmake to link to duckdb via `build_loadable_extension` macro. -# - -cmake_minimum_required(VERSION 3.22) - -if (POLICY CMP0135) - cmake_policy(SET CMP0135 NEW) -endif () - -project(lance_duckdb VERSION 0.3) -set(EXTENSION_NAME lance) - -if (APPLE) - # POLICY CMP0042 - set(CMAKE_MACOSX_RPATH 1) -endif() - -include(FetchContent) - -if(UNIX AND NOT APPLE) - find_package(OpenSSL REQUIRED) -endif() - -FetchContent_Declare( - Corrosion - GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git - GIT_TAG v0.3.2 # Optionally specify a commit hash, version tag or branch here -) -set(BUILD_UNITTESTS FALSE) # Disable unit test build in duckdb - -FetchContent_MakeAvailable(Corrosion) - -#set(EXTERNAL_EXTENSION_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}) - -corrosion_import_crate(MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/duckdb/src/include) - -set(ALL_SOURCES src/extension.c src/extension.h) - -SET(EXTENSION_STATIC_BUILD 1) -set(PARAMETERS "-warnings") -build_loadable_extension(${EXTENSION_NAME} ${PARAMETERS} ${ALL_SOURCES}) - -set(LIB_NAME ${EXTENSION_NAME}_loadable_extension) - -set_target_properties(${LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) -target_link_libraries(${LIB_NAME} - "${CMAKE_CURRENT_BINARY_DIR}/libduckdb_lance.a" - duckdb_static - ${OPENSSL_LIBRARIES} -) - -if (APPLE) - target_link_libraries(${LIB_NAME} - "-framework CoreFoundation" - "-framework Security" - "-framework Accelerate") -endif() diff --git a/integration/duckdb_lance/Cargo.toml b/integration/duckdb_lance/Cargo.toml deleted file mode 100644 index e163d5c0dc..0000000000 --- a/integration/duckdb_lance/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "duckdb-lance" -version = "0.1.0" -edition = "2021" - -[dependencies] -lance = { path = "../../rust/lance" } -duckdb-ext = { path = "./duckdb-ext" } -lazy_static = "1.4.0" -tokio = { version = "1.23", features = ["rt-multi-thread"] } -arrow-schema = "49.0.0" -arrow-array = "49.0.0" -futures = "0.3" -num-traits = "0.2" - -[dev-dependencies] -libduckdb-sys = { version = "0.8.1", features = ["bundled"] } - -[lib] -name = "duckdb_lance" -crate-type = ["staticlib"] diff --git a/integration/duckdb_lance/Makefile b/integration/duckdb_lance/Makefile deleted file mode 100644 index 7c15c9d0d4..0000000000 --- a/integration/duckdb_lance/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# - -BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DCLANG_TIDY=False - -# Debug build -build: - mkdir -p build/debug && \ - cd build/debug && \ - cmake $(GENERATOR) $(FORCE_COLOR) -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} ../../duckdb/CMakeLists.txt -DEXTERNAL_EXTENSION_DIRECTORIES=../../duckdb_lance -B. && \ - cmake --build . --config Debug -.PHONY: build - - -release: - mkdir -p build/release && \ - cd build/release && \ - cmake $(GENERATOR) $(FORCE_COLOR) -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} \ - ../../duckdb/CMakeLists.txt -DEXTERNAL_EXTENSION_DIRECTORIES=../../duckdb_lance -B. && \ - cmake --build . --config Release -.PHONY: release - diff --git a/integration/duckdb_lance/README.md b/integration/duckdb_lance/README.md deleted file mode 100644 index 9fef7377e6..0000000000 --- a/integration/duckdb_lance/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# DuckDB Extension - - -## How to build - -```sh - -git submodule update -make build -``` diff --git a/integration/duckdb_lance/duckdb b/integration/duckdb_lance/duckdb deleted file mode 160000 index f7827396d7..0000000000 --- a/integration/duckdb_lance/duckdb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f7827396d70232a0434c91142809deef6e0b6092 diff --git a/integration/duckdb_lance/duckdb-ext/Cargo.toml b/integration/duckdb_lance/duckdb-ext/Cargo.toml deleted file mode 100644 index c14b849964..0000000000 --- a/integration/duckdb_lance/duckdb-ext/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "duckdb-ext" -version = "0.1.0" -edition = "2021" - -[dependencies] - -[build-dependencies] -bindgen = "0.64.0" -build_script = "0.2.0" -cc = "1.0.78" diff --git a/integration/duckdb_lance/duckdb-ext/README.md b/integration/duckdb_lance/duckdb-ext/README.md deleted file mode 100644 index 9f15206cb2..0000000000 --- a/integration/duckdb_lance/duckdb-ext/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# DuckDB Rust Extension Toolkit - - -## Credits - -This library was inspired by [DuckDB Extension Framework](https://github.com/Mause/duckdb-extension-framework). diff --git a/integration/duckdb_lance/duckdb-ext/build.rs b/integration/duckdb_lance/duckdb-ext/build.rs deleted file mode 100644 index 6696365900..0000000000 --- a/integration/duckdb_lance/duckdb-ext/build.rs +++ /dev/null @@ -1,40 +0,0 @@ -use build_script::cargo_rerun_if_changed; -use std::path::PathBuf; -use std::{env, path::Path}; - -fn main() { - let duckdb_root = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()) - .join("duckdb") - .canonicalize() - .expect("duckdb source root"); - - let header = "src/duckdb_ext.h"; - - cargo_rerun_if_changed(header); - - let duckdb_include = duckdb_root.join("src/include"); - let bindings = bindgen::Builder::default() - .header(header) - .clang_arg("-xc++") - .clang_arg("-I") - .clang_arg(duckdb_include.to_string_lossy()) - .derive_debug(true) - .derive_default(true) - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - .generate() - .expect("Unable to generate bindings"); - - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); - bindings - .write_to_file(out_path.join("bindings.rs")) - .expect("Couldn't write bindings!"); - - cc::Build::new() - .include(duckdb_include) - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-redundant-move") - .flag_if_supported("-std=c++17") - .cpp(true) - .file("src/duckdb_ext.cc") - .compile("duckdb_ext"); -} diff --git a/integration/duckdb_lance/duckdb-ext/duckdb b/integration/duckdb_lance/duckdb-ext/duckdb deleted file mode 160000 index f7827396d7..0000000000 --- a/integration/duckdb_lance/duckdb-ext/duckdb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f7827396d70232a0434c91142809deef6e0b6092 diff --git a/integration/duckdb_lance/duckdb-ext/src/connection.rs b/integration/duckdb_lance/duckdb-ext/src/connection.rs deleted file mode 100644 index ae125990fe..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/connection.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::ffi::{duckdb_connection, duckdb_register_table_function}; -use crate::table_function::TableFunction; - -/// A connection to a database. This represents a (client) connection that can -/// be used to query the database. -#[derive(Debug)] -pub struct Connection { - ptr: duckdb_connection, -} - -impl From for Connection { - fn from(ptr: duckdb_connection) -> Self { - Self { ptr } - } -} - -impl Connection { - pub fn register_table_function( - &self, - table_function: TableFunction, - ) -> Result<(), Box> { - unsafe { - duckdb_register_table_function(self.ptr, table_function.ptr); - } - Ok(()) - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/data_chunk.rs b/integration/duckdb_lance/duckdb-ext/src/data_chunk.rs deleted file mode 100644 index 32194bc50c..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/data_chunk.rs +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use super::vector::{FlatVector, ListVector, StructVector}; -use crate::{ - ffi::{ - duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_size, - duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_destroy_data_chunk, - duckdb_data_chunk_get_column_count, - }, - LogicalType, -}; - -/// DataChunk in DuckDB. -pub struct DataChunk { - /// Pointer to the DataChunk in duckdb C API. - ptr: duckdb_data_chunk, - - /// Whether this [DataChunk] own the [DataChunk::ptr]. - owned: bool, -} - -impl DataChunk { - pub fn new(logical_types: &[LogicalType]) -> Self { - let num_columns = logical_types.len(); - let mut c_types = logical_types.iter().map(|t| t.ptr).collect::>(); - let ptr = unsafe { duckdb_create_data_chunk(c_types.as_mut_ptr(), num_columns as u64) }; - DataChunk { ptr, owned: true } - } - - /// Get the vector at the specific column index: `idx`. - /// - pub fn flat_vector(&self, idx: usize) -> FlatVector { - FlatVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) - } - - /// Get a list vector from the column index. - pub fn list_vector(&self, idx: usize) -> ListVector { - ListVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) - } - - /// Get struct vector at the column index: `idx`. - pub fn struct_vector(&self, idx: usize) -> StructVector { - StructVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) - } - - /// Set the size of the data chunk - pub fn set_len(&self, new_len: usize) { - unsafe { duckdb_data_chunk_set_size(self.ptr, new_len as u64) }; - } - - /// Get the length / the number of rows in this [DataChunk]. - pub fn len(&self) -> usize { - unsafe { duckdb_data_chunk_get_size(self.ptr) as usize } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn num_columns(&self) -> usize { - unsafe { duckdb_data_chunk_get_column_count(self.ptr) as usize } - } -} - -impl From for DataChunk { - fn from(ptr: duckdb_data_chunk) -> Self { - Self { ptr, owned: false } - } -} - -impl Drop for DataChunk { - fn drop(&mut self) { - if self.owned && !self.ptr.is_null() { - unsafe { duckdb_destroy_data_chunk(&mut self.ptr) } - self.ptr = std::ptr::null_mut(); - } - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/database.rs b/integration/duckdb_lance/duckdb-ext/src/database.rs deleted file mode 100644 index 41a181aa35..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/database.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::ffi::{duckdb_connect, duckdb_connection, duckdb_database, duckdb_state_DuckDBError}; -use crate::{Connection, Error, Result}; - -pub struct Database { - ptr: duckdb_database, -} - -impl From for Database { - fn from(ptr: duckdb_database) -> Self { - Self { ptr } - } -} - -impl Database { - pub fn connect(&self) -> Result { - let mut connection: duckdb_connection = std::ptr::null_mut(); - - let state = unsafe { duckdb_connect(self.ptr, &mut connection) }; - if state == duckdb_state_DuckDBError { - return Err(Error::DuckDB("Connection error".to_string())); - } - - Ok(Connection::from(connection)) - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.cc b/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.cc deleted file mode 100644 index c2efa5c66c..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.cc +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2023 Lance Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "duckdb_ext.h" - -#include - -#include "duckdb.hpp" - -namespace { - -auto build_child_list(idx_t n_pairs, const char *const *names, duckdb_logical_type const *types) { - duckdb::child_list_t members; - for (idx_t i = 0; i < n_pairs; i++) { - members.emplace_back(std::string(names[i]), *(duckdb::LogicalType *)types[i]); - } - return members; -} - -} // namespace - -extern "C" { - -duckdb_logical_type duckdb_create_struct_type(idx_t n_pairs, - const char **names, - const duckdb_logical_type *types) { - auto *stype = new duckdb::LogicalType; - *stype = duckdb::LogicalType::STRUCT(build_child_list(n_pairs, names, types)); - return reinterpret_cast(stype); -} - -} \ No newline at end of file diff --git a/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.h b/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.h deleted file mode 100644 index d246e483c8..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/duckdb_ext.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2023 Lance Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#define DUCKDB_BUILD_LOADABLE_EXTENSION -#include "duckdb.h" - -extern "C" { - -DUCKDB_EXTENSION_API duckdb_logical_type duckdb_create_struct_type( - idx_t n_pairs, const char** names, const duckdb_logical_type* types); - -}; diff --git a/integration/duckdb_lance/duckdb-ext/src/error.rs b/integration/duckdb_lance/duckdb-ext/src/error.rs deleted file mode 100644 index d5e8f9de2a..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/error.rs +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ffi::CString; - -pub enum Error { - IO(String), - DuckDB(String), -} - -pub type Result = std::result::Result; - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::IO(s) => write!(f, "I/O: {s}"), - Self::DuckDB(s) => write!(f, "I/O: {s}"), - } - } -} - -impl Error { - pub fn c_str(&self) -> CString { - CString::new(self.to_string()).unwrap() - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/function_info.rs b/integration/duckdb_lance/duckdb-ext/src/function_info.rs deleted file mode 100644 index a81c967a83..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/function_info.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::ffi::{duckdb_function_get_init_data, duckdb_function_info, duckdb_function_set_error}; -use crate::Error; - -/// UDF -pub struct FunctionInfo { - ptr: duckdb_function_info, -} - -impl From for FunctionInfo { - fn from(ptr: duckdb_function_info) -> Self { - Self { ptr } - } -} - -impl FunctionInfo { - pub fn init_data(&self) -> *mut T { - unsafe { duckdb_function_get_init_data(self.ptr).cast() } - } - - pub fn set_error(&self, error: Error) { - unsafe { - duckdb_function_set_error(self.ptr, error.c_str().as_ptr()); - } - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/lib.rs b/integration/duckdb_lance/duckdb-ext/src/lib.rs deleted file mode 100644 index 8cc7597a69..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/lib.rs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod connection; -mod data_chunk; -mod database; -mod error; -mod function_info; -mod logical_type; -pub mod table_function; -mod value; -mod vector; - -pub use connection::Connection; -pub use data_chunk::DataChunk; -pub use database::Database; -pub use error::{Error, Result}; -pub use function_info::FunctionInfo; -pub use logical_type::{LogicalType, LogicalTypeId}; -pub use value::Value; -pub use vector::{FlatVector, Inserter, ListVector, StructVector, Vector}; - -#[allow(clippy::all)] -pub mod ffi { - #![allow(non_upper_case_globals)] - #![allow(non_camel_case_types)] - #![allow(non_snake_case)] - #![allow(unused)] - #![allow(improper_ctypes)] - #![allow(clippy::upper_case_acronyms)] - include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -} diff --git a/integration/duckdb_lance/duckdb-ext/src/logical_type.rs b/integration/duckdb_lance/duckdb-ext/src/logical_type.rs deleted file mode 100644 index 921f273b16..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/logical_type.rs +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ffi::{c_char, CString}; -use std::fmt::Debug; - -use crate::ffi::*; - -#[repr(u32)] -#[derive(Debug, PartialEq, Eq)] -pub enum LogicalTypeId { - Boolean = DUCKDB_TYPE_DUCKDB_TYPE_BOOLEAN, - Tinyint = DUCKDB_TYPE_DUCKDB_TYPE_TINYINT, - Smallint = DUCKDB_TYPE_DUCKDB_TYPE_SMALLINT, - Integer = DUCKDB_TYPE_DUCKDB_TYPE_INTEGER, - Bigint = DUCKDB_TYPE_DUCKDB_TYPE_BIGINT, - UTinyint = DUCKDB_TYPE_DUCKDB_TYPE_UTINYINT, - USmallint = DUCKDB_TYPE_DUCKDB_TYPE_USMALLINT, - UInteger = DUCKDB_TYPE_DUCKDB_TYPE_UINTEGER, - UBigint = DUCKDB_TYPE_DUCKDB_TYPE_UBIGINT, - Float = DUCKDB_TYPE_DUCKDB_TYPE_FLOAT, - Double = DUCKDB_TYPE_DUCKDB_TYPE_DOUBLE, - Timestamp = DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP, - Date = DUCKDB_TYPE_DUCKDB_TYPE_DATE, - Time = DUCKDB_TYPE_DUCKDB_TYPE_TIME, - Interval = DUCKDB_TYPE_DUCKDB_TYPE_INTERVAL, - Hugeint = DUCKDB_TYPE_DUCKDB_TYPE_HUGEINT, - Varchar = DUCKDB_TYPE_DUCKDB_TYPE_VARCHAR, - Blob = DUCKDB_TYPE_DUCKDB_TYPE_BLOB, - Decimal = DUCKDB_TYPE_DUCKDB_TYPE_DECIMAL, - TimestampS = DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_S, - TimestampMs = DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_MS, - TimestampNs = DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_NS, - Enum = DUCKDB_TYPE_DUCKDB_TYPE_ENUM, - List = DUCKDB_TYPE_DUCKDB_TYPE_LIST, - Struct = DUCKDB_TYPE_DUCKDB_TYPE_STRUCT, - Map = DUCKDB_TYPE_DUCKDB_TYPE_MAP, - Uuid = DUCKDB_TYPE_DUCKDB_TYPE_UUID, - Union = DUCKDB_TYPE_DUCKDB_TYPE_UNION, -} - -impl From for LogicalTypeId { - fn from(value: u32) -> Self { - match value { - DUCKDB_TYPE_DUCKDB_TYPE_BOOLEAN => Self::Boolean, - DUCKDB_TYPE_DUCKDB_TYPE_TINYINT => Self::Tinyint, - DUCKDB_TYPE_DUCKDB_TYPE_SMALLINT => Self::Smallint, - DUCKDB_TYPE_DUCKDB_TYPE_INTEGER => Self::Integer, - DUCKDB_TYPE_DUCKDB_TYPE_BIGINT => Self::Bigint, - DUCKDB_TYPE_DUCKDB_TYPE_UTINYINT => Self::UTinyint, - DUCKDB_TYPE_DUCKDB_TYPE_USMALLINT => Self::USmallint, - DUCKDB_TYPE_DUCKDB_TYPE_UINTEGER => Self::UInteger, - DUCKDB_TYPE_DUCKDB_TYPE_UBIGINT => Self::UBigint, - DUCKDB_TYPE_DUCKDB_TYPE_FLOAT => Self::Float, - DUCKDB_TYPE_DUCKDB_TYPE_DOUBLE => Self::Double, - DUCKDB_TYPE_DUCKDB_TYPE_VARCHAR => Self::Varchar, - DUCKDB_TYPE_DUCKDB_TYPE_BLOB => Self::Blob, - DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP => Self::Timestamp, - DUCKDB_TYPE_DUCKDB_TYPE_DATE => Self::Date, - DUCKDB_TYPE_DUCKDB_TYPE_TIME => Self::Time, - DUCKDB_TYPE_DUCKDB_TYPE_INTERVAL => Self::Interval, - DUCKDB_TYPE_DUCKDB_TYPE_HUGEINT => Self::Hugeint, - DUCKDB_TYPE_DUCKDB_TYPE_DECIMAL => Self::Decimal, - DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_S => Self::TimestampS, - DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_MS => Self::TimestampMs, - DUCKDB_TYPE_DUCKDB_TYPE_TIMESTAMP_NS => Self::TimestampNs, - DUCKDB_TYPE_DUCKDB_TYPE_ENUM => Self::Enum, - DUCKDB_TYPE_DUCKDB_TYPE_LIST => Self::List, - DUCKDB_TYPE_DUCKDB_TYPE_STRUCT => Self::Struct, - DUCKDB_TYPE_DUCKDB_TYPE_MAP => Self::Map, - DUCKDB_TYPE_DUCKDB_TYPE_UUID => Self::Uuid, - DUCKDB_TYPE_DUCKDB_TYPE_UNION => Self::Union, - _ => panic!(), - } - } -} - -/// DuckDB Logical Type. -/// -/// https://duckdb.org/docs/sql/data_types/overview -pub struct LogicalType { - pub(crate) ptr: duckdb_logical_type, -} - -impl Debug for LogicalType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - let id = self.id(); - match id { - LogicalTypeId::Struct => { - write!(f, "struct<")?; - for i in 0..self.num_children() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "{}: {:?}", self.child_name(i), self.child(i))?; - } - write!(f, ">") - } - _ => write!(f, "{:?}", self.id()), - } - } -} - -impl Drop for LogicalType { - fn drop(&mut self) { - if !self.ptr.is_null() { - unsafe { - duckdb_destroy_logical_type(&mut self.ptr); - } - } - - self.ptr = std::ptr::null_mut(); - } -} - -/// Wrap a DuckDB logical type from C API -impl From for LogicalType { - fn from(ptr: duckdb_logical_type) -> Self { - Self { ptr } - } -} - -impl LogicalType { - /// Create a new [LogicalType] from [LogicalTypeId] - pub fn new(id: LogicalTypeId) -> Self { - unsafe { - Self { - ptr: duckdb_create_logical_type(id as u32), - } - } - } - - /// Creates a list type from its child type. - /// - pub fn list_type(child_type: &LogicalType) -> Self { - unsafe { - Self { - ptr: duckdb_create_list_type(child_type.ptr), - } - } - } - - /// Make a `LogicalType` for `struct` - /// - pub fn struct_type(fields: &[(&str, LogicalType)]) -> Self { - let keys: Vec = fields.iter().map(|f| CString::new(f.0).unwrap()).collect(); - let values: Vec = fields.iter().map(|it| it.1.ptr).collect(); - let name_ptrs = keys - .iter() - .map(|it| it.as_ptr()) - .collect::>(); - - unsafe { - Self { - ptr: duckdb_create_struct_type( - fields.len() as idx_t, - name_ptrs.as_slice().as_ptr().cast_mut(), - values.as_slice().as_ptr(), - ), - } - } - } - - /// Logical type ID - pub fn id(&self) -> LogicalTypeId { - let duckdb_type_id = unsafe { duckdb_get_type_id(self.ptr) }; - duckdb_type_id.into() - } - - pub fn num_children(&self) -> usize { - match self.id() { - LogicalTypeId::Struct => unsafe { duckdb_struct_type_child_count(self.ptr) as usize }, - LogicalTypeId::List => 1, - _ => 0, - } - } - - pub fn child_name(&self, idx: usize) -> String { - assert_eq!(self.id(), LogicalTypeId::Struct); - unsafe { - let child_name_ptr = duckdb_struct_type_child_name(self.ptr, idx as u64); - let c_str = CString::from_raw(child_name_ptr); - let name = c_str.to_str().unwrap(); - name.to_string() - } - } - - pub fn child(&self, idx: usize) -> Self { - let c_logical_type = unsafe { duckdb_struct_type_child_type(self.ptr, idx as u64) }; - Self::from(c_logical_type) - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/table_function.rs b/integration/duckdb_lance/duckdb-ext/src/table_function.rs deleted file mode 100644 index cf2b8a0b57..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/table_function.rs +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ffi::{c_void, CString}; - -use crate::ffi::{ - duckdb_bind_add_result_column, duckdb_bind_get_parameter, duckdb_bind_get_parameter_count, - duckdb_bind_info, duckdb_bind_set_bind_data, duckdb_bind_set_cardinality, - duckdb_bind_set_error, duckdb_create_table_function, duckdb_delete_callback_t, - duckdb_destroy_table_function, duckdb_init_get_bind_data, duckdb_init_info, - duckdb_init_set_error, duckdb_init_set_init_data, duckdb_table_function, - duckdb_table_function_add_parameter, duckdb_table_function_bind_t, - duckdb_table_function_init_t, duckdb_table_function_set_bind, - duckdb_table_function_set_function, duckdb_table_function_set_init, - duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown, - duckdb_table_function_t, duckdb_init_get_column_count, duckdb_init_get_column_index, -}; -use crate::{Error, LogicalType, Value}; - -/// DuckDB BindInfo. -pub struct BindInfo { - ptr: duckdb_bind_info, -} - -impl From for BindInfo { - fn from(ptr: duckdb_bind_info) -> Self { - Self { ptr } - } -} - -impl BindInfo { - /// Add a result column to the output of the table function. - /// - /// - `name`: The name of the column - /// - `logical_type`: The [LogicalType] of the new column. - /// - /// # Safety - pub fn add_result_column(&self, name: &str, logical_type: LogicalType) { - let c_string = CString::new(name).unwrap(); - unsafe { - duckdb_bind_add_result_column(self.ptr, c_string.as_ptr(), logical_type.ptr); - } - } - - /// Sets the user-provided bind data in the bind object. This object can be retrieved again during execution. - /// - /// # Arguments - /// * `extra_data`: The bind data object. - /// * `destroy`: The callback that will be called to destroy the bind data (if any) - /// - /// # Safety - /// - pub fn set_bind_data( - &self, - data: *mut c_void, - free_function: Option, - ) { - unsafe { - duckdb_bind_set_bind_data(self.ptr, data, free_function); - } - } - - /// Get the number of regular (non-named) parameters to the function. - pub fn num_parameters(&self) -> u64 { - unsafe { duckdb_bind_get_parameter_count(self.ptr) } - } - - /// Get the parameter at the given index. - /// - /// # Arguments - /// * `index`: The index of the parameter to get - /// - /// returns: The value of the parameter - pub fn parameter(&self, index: usize) -> Value { - unsafe { Value::from(duckdb_bind_get_parameter(self.ptr, index as u64)) } - } - - /// Sets the cardinality estimate for the table function, used for optimization. - /// - /// * `cardinality`: The cardinality estimate - /// * `is_exact`: Whether or not the cardinality estimate is exact, or an approximation - pub fn set_cardinality(&self, cardinality: usize, is_exact: bool) { - unsafe { duckdb_bind_set_cardinality(self.ptr, cardinality as u64, is_exact) } - } - - pub fn set_error(&self, error: Error) { - unsafe { - duckdb_bind_set_error(self.ptr, error.c_str().as_ptr()); - } - } -} - -#[derive(Debug)] -pub struct InitInfo { - ptr: duckdb_init_info, -} - -impl From for InitInfo { - fn from(ptr: duckdb_init_info) -> Self { - Self { ptr } - } -} - -impl InitInfo { - /// # Safety - pub fn set_init_data(&self, data: *mut c_void, freeer: duckdb_delete_callback_t) { - unsafe { - duckdb_init_set_init_data(self.ptr, data, freeer); - } - } - - pub fn bind_data(&self) -> *mut T { - unsafe { duckdb_init_get_bind_data(self.ptr).cast() } - } - - /// Report that an error has occurred while calling init. - /// - /// # Arguments - /// * `error`: The error message - pub fn set_error(&self, error: Error) { - unsafe { duckdb_init_set_error(self.ptr, error.c_str().as_ptr()) } - } - - /// Get the total number of columns to be projected. - pub fn projected_column_ids(&self) -> Vec { - let num_columns = unsafe { duckdb_init_get_column_count(self.ptr) as usize }; - (0..num_columns).map(|col_id| { - unsafe { duckdb_init_get_column_index(self.ptr, col_id as u64) as usize} - }).collect() - } -} - -/// A function that returns a queryable table -#[derive(Debug)] -pub struct TableFunction { - pub(crate) ptr: duckdb_table_function, -} - -impl Drop for TableFunction { - fn drop(&mut self) { - if !self.ptr.is_null() { - unsafe { - duckdb_destroy_table_function(&mut self.ptr); - } - } - self.ptr = std::ptr::null_mut(); - } -} - -impl TableFunction { - /// Creates a new empty table function. - pub fn new(name: &str) -> Self { - let this = Self { - ptr: unsafe { duckdb_create_table_function() }, - }; - this.set_name(name); - this - } - - pub fn set_name(&self, name: &str) -> &Self { - unsafe { - let string = CString::new(name).unwrap(); - duckdb_table_function_set_name(self.ptr, string.as_ptr()); - } - self - } - - /// Adds a parameter to the table function. - /// - pub fn add_parameter(&self, logical_type: &LogicalType) -> &Self { - unsafe { - duckdb_table_function_add_parameter(self.ptr, logical_type.ptr); - } - self - } - - /// Enable project pushdown. - pub fn pushdown(&self, supports: bool) -> &Self { - unsafe { - duckdb_table_function_supports_projection_pushdown(self.ptr, supports); - } - self - } - - /// Sets the main function of the table function - /// - pub fn set_function(&self, func: duckdb_table_function_t) -> &Self { - unsafe { - duckdb_table_function_set_function(self.ptr, func); - } - self - } - - /// Sets the init function of the table function - /// - /// # Arguments - /// * `function`: The init function - pub fn set_init(&self, init_func: duckdb_table_function_init_t) -> &Self { - unsafe { - duckdb_table_function_set_init(self.ptr, init_func); - } - self - } - - /// Sets the bind function of the table function - /// - /// # Arguments - /// * `bind_func`: The bind function - pub fn set_bind(&self, bind_func: duckdb_table_function_bind_t) -> &Self { - unsafe { - duckdb_table_function_set_bind(self.ptr, bind_func); - } - self - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/value.rs b/integration/duckdb_lance/duckdb-ext/src/value.rs deleted file mode 100644 index 04728f869a..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/value.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::ffi::{duckdb_destroy_value, duckdb_get_varchar, duckdb_value}; -use std::ffi::CString; - -/// The Value object holds a single arbitrary value of any type that can be -/// stored in the database. -#[derive(Debug)] -pub struct Value { - pub(crate) ptr: duckdb_value, -} - -impl From for Value { - fn from(ptr: duckdb_value) -> Self { - Self { ptr } - } -} - -impl Drop for Value { - fn drop(&mut self) { - if !self.ptr.is_null() { - unsafe { - duckdb_destroy_value(&mut self.ptr); - } - } - self.ptr = std::ptr::null_mut(); - } -} - -impl Value { - pub fn to_string(&self) -> String { - let c_string = unsafe { CString::from_raw(duckdb_get_varchar(self.ptr)) }; - c_string.into_string().unwrap() - } -} diff --git a/integration/duckdb_lance/duckdb-ext/src/vector.rs b/integration/duckdb_lance/duckdb-ext/src/vector.rs deleted file mode 100644 index 1f40300e5c..0000000000 --- a/integration/duckdb_lance/duckdb-ext/src/vector.rs +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::ffi::CString; -use std::slice; - -use crate::ffi::{ - duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, - duckdb_list_vector_reserve, duckdb_list_vector_set_size, duckdb_struct_type_child_count, - duckdb_struct_type_child_name, duckdb_struct_vector_get_child, duckdb_vector, - duckdb_vector_assign_string_element, duckdb_vector_get_column_type, duckdb_vector_get_data, - duckdb_vector_size, -}; -use crate::LogicalType; - -/// Vector trait. -pub trait Vector { - fn as_any(&self) -> &dyn Any; - - fn as_mut_any(&mut self) -> &mut dyn Any; -} - -pub struct FlatVector { - ptr: duckdb_vector, - capacity: usize, -} - -impl From for FlatVector { - fn from(ptr: duckdb_vector) -> Self { - Self { - ptr, - capacity: unsafe { duckdb_vector_size() as usize }, - } - } -} - -impl Vector for FlatVector { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn Any { - self - } -} - -impl FlatVector { - fn with_capacity(ptr: duckdb_vector, capacity: usize) -> Self { - Self { ptr, capacity } - } - - pub fn capacity(&self) -> usize { - self.capacity - } - - /// Returns an unsafe mutable pointer to the vector’s - pub fn as_mut_ptr(&self) -> *mut T { - unsafe { duckdb_vector_get_data(self.ptr).cast() } - } - - pub fn as_slice(&self) -> &[T] { - unsafe { slice::from_raw_parts(self.as_mut_ptr(), self.capacity()) } - } - - pub fn as_mut_slice(&mut self) -> &mut [T] { - unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity()) } - } - - pub fn logical_type(&self) -> LogicalType { - LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) - } - - pub fn copy(&mut self, data: &[T]) { - assert!(data.len() <= self.capacity()); - self.as_mut_slice::()[0..data.len()].copy_from_slice(data); - } -} - -pub trait Inserter { - fn insert(&self, index: usize, value: T); -} - -impl Inserter<&str> for FlatVector { - fn insert(&self, index: usize, value: &str) { - let cstr = CString::new(value.as_bytes()).unwrap(); - unsafe { - duckdb_vector_assign_string_element(self.ptr, index as u64, cstr.as_ptr()); - } - } -} - -pub struct ListVector { - /// ListVector does not own the vector pointer. - entries: FlatVector, -} - -impl From for ListVector { - fn from(ptr: duckdb_vector) -> Self { - Self { - entries: FlatVector::from(ptr), - } - } -} - -impl ListVector { - pub fn len(&self) -> usize { - unsafe { duckdb_list_vector_get_size(self.entries.ptr) as usize } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - // TODO: not ideal interface. Where should we keep capacity. - pub fn child(&self, capacity: usize) -> FlatVector { - self.reserve(capacity); - FlatVector::with_capacity( - unsafe { duckdb_list_vector_get_child(self.entries.ptr) }, - capacity, - ) - } - - /// Set primitive data to the child node. - pub fn set_child(&self, data: &[T]) { - self.child(data.len()).copy(data); - self.set_len(data.len()); - } - - pub fn set_entry(&mut self, idx: usize, offset: usize, length: usize) { - self.entries.as_mut_slice::()[idx].offset = offset as u64; - self.entries.as_mut_slice::()[idx].length = length as u64; - } - - /// Reserve the capacity for its child node. - fn reserve(&self, capacity: usize) { - unsafe { duckdb_list_vector_reserve(self.entries.ptr, capacity as u64); } - } - - pub fn set_len(&self, new_len: usize) { - unsafe { duckdb_list_vector_set_size(self.entries.ptr, new_len as u64); } - } -} - -pub struct StructVector { - /// ListVector does not own the vector pointer. - ptr: duckdb_vector, -} - -impl From for StructVector { - fn from(ptr: duckdb_vector) -> Self { - Self { ptr } - } -} - -impl StructVector { - pub fn child(&self, idx: usize) -> FlatVector { - FlatVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) }) - } - - /// Take the child as [StructVector]. - pub fn struct_vector_child(&self, idx: usize) -> StructVector { - Self::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) }) - } - - pub fn list_vector_child(&self, idx: usize) -> ListVector { - ListVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) }) - } - - /// Get the logical type of this struct vector. - pub fn logical_type(&self) -> LogicalType { - LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) - } - - pub fn child_name(&self, idx: usize) -> String { - let logical_type = self.logical_type(); - unsafe { - let child_name_ptr = duckdb_struct_type_child_name(logical_type.ptr, idx as u64); - let c_str = CString::from_raw(child_name_ptr); - let name = c_str.to_str().unwrap(); - // duckdb_free(child_name_ptr.cast()); - name.to_string() - } - } - - pub fn num_children(&self) -> usize { - let logical_type = self.logical_type(); - unsafe { duckdb_struct_type_child_count(logical_type.ptr) as usize } - } -} diff --git a/integration/duckdb_lance/src/arrow.rs b/integration/duckdb_lance/src/arrow.rs deleted file mode 100644 index 0c01462746..0000000000 --- a/integration/duckdb_lance/src/arrow.rs +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Arrow / DuckDB conversion. - -use arrow_array::{ - cast::{ - as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, - as_struct_array, - }, - types::*, - Array, ArrowPrimitiveType, BooleanArray, FixedSizeListArray, GenericListArray, OffsetSizeTrait, - PrimitiveArray, RecordBatch, StringArray, StructArray, -}; -use arrow_schema::DataType; -use duckdb_ext::{DataChunk, FlatVector, Inserter, ListVector, StructVector, Vector}; -use duckdb_ext::{LogicalType, LogicalTypeId}; -use lance::arrow::as_fixed_size_list_array; -use num_traits::AsPrimitive; - -use crate::{Error, Result}; - -pub fn to_duckdb_type_id(data_type: &DataType) -> Result { - use LogicalTypeId::*; - - let type_id = match data_type { - DataType::Boolean => Boolean, - DataType::Int8 => Tinyint, - DataType::Int16 => Smallint, - DataType::Int32 => Integer, - DataType::Int64 => Bigint, - DataType::UInt8 => UTinyint, - DataType::UInt16 => USmallint, - DataType::UInt32 => UInteger, - DataType::UInt64 => UBigint, - DataType::Float32 => Float, - DataType::Float64 => Double, - DataType::Timestamp(_, _) => Timestamp, - DataType::Date32 => Time, - DataType::Date64 => Time, - DataType::Time32(_) => Time, - DataType::Time64(_) => Time, - DataType::Duration(_) => Interval, - DataType::Interval(_) => Interval, - DataType::Binary | DataType::LargeBinary | DataType::FixedSizeBinary(_) => Blob, - DataType::Utf8 | DataType::LargeUtf8 => Varchar, - DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => List, - DataType::Struct(_) => Struct, - DataType::Union(_, _) => Union, - DataType::Dictionary(_, _) => todo!(), - DataType::Decimal128(_, _) => Decimal, - DataType::Decimal256(_, _) => Decimal, - DataType::Map(_, _) => Map, - _ => { - return Err(Error::DuckDB(format!( - "Unsupported arrow type: {data_type}" - ))); - } - }; - Ok(type_id) -} - -pub fn to_duckdb_logical_type(data_type: &DataType) -> Result { - if data_type.is_primitive() - || matches!( - data_type, - DataType::Boolean - | DataType::Utf8 - | DataType::LargeUtf8 - | DataType::Binary - | DataType::LargeBinary - ) - { - Ok(LogicalType::new(to_duckdb_type_id(data_type)?)) - } else if let DataType::Dictionary(_, value_type) = data_type { - to_duckdb_logical_type(value_type) - } else if let DataType::Struct(fields) = data_type { - let mut shape = vec![]; - for field in fields.iter() { - shape.push(( - field.name().as_str(), - to_duckdb_logical_type(field.data_type())?, - )); - } - Ok(LogicalType::struct_type(shape.as_slice())) - } else if let DataType::List(child) = data_type { - Ok(LogicalType::list_type(&to_duckdb_logical_type( - child.data_type(), - )?)) - } else if let DataType::LargeList(child) = data_type { - Ok(LogicalType::list_type(&to_duckdb_logical_type( - child.data_type(), - )?)) - } else if let DataType::FixedSizeList(child, _) = data_type { - Ok(LogicalType::list_type(&to_duckdb_logical_type( - child.data_type(), - )?)) - } else { - todo!("Unsupported data type: {data_type}, please file an issue at https://github.com/lancedb/lance"); - } -} - -pub fn record_batch_to_duckdb_data_chunk(batch: &RecordBatch, chunk: &mut DataChunk) -> Result<()> { - // Fill the row - assert_eq!(batch.num_columns(), chunk.num_columns()); - for i in 0..batch.num_columns() { - let col = batch.column(i); - match col.data_type() { - dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => { - primitive_array_to_vector(col, &mut chunk.flat_vector(i)); - } - DataType::Utf8 => { - string_array_to_vector(as_string_array(col.as_ref()), &mut chunk.flat_vector(i)); - } - DataType::List(_) => { - list_array_to_vector(as_list_array(col.as_ref()), &mut chunk.list_vector(i)); - } - DataType::LargeList(_) => { - list_array_to_vector(as_large_list_array(col.as_ref()), &mut chunk.list_vector(i)); - } - DataType::FixedSizeList(_, _) => { - fixed_size_list_array_to_vector( - as_fixed_size_list_array(col.as_ref()), - &mut chunk.list_vector(i), - ); - } - DataType::Struct(_) => { - let struct_array = as_struct_array(col.as_ref()); - let mut struct_vector = chunk.struct_vector(i); - struct_array_to_vector(struct_array, &mut struct_vector); - } - _ => { - todo!("column {} is not supported yet, please file an issue at https://github.com/lancedb/lance", batch.schema().field(i)); - } - } - } - chunk.set_len(batch.num_rows()); - Ok(()) -} - -fn primitive_array_to_flat_vector( - array: &PrimitiveArray, - out_vector: &mut FlatVector, -) { - // assert!(array.len() <= out_vector.capacity()); - out_vector.copy::(array.values()); -} - -fn primitive_array_to_vector(array: &dyn Array, out: &mut dyn Vector) { - match array.data_type() { - DataType::Boolean => { - boolean_array_to_vector( - as_boolean_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::UInt8 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::UInt16 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::UInt32 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::UInt64 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Int8 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Int16 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Int32 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Int64 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Float32 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - DataType::Float64 => { - primitive_array_to_flat_vector::( - as_primitive_array(array), - out.as_mut_any().downcast_mut().unwrap(), - ); - } - _ => { - todo!() - } - } -} - -/// Convert Arrow [BooleanArray] to a duckdb vector. -fn boolean_array_to_vector(array: &BooleanArray, out: &mut FlatVector) { - assert!(array.len() <= out.capacity()); - - for i in 0..array.len() { - out.as_mut_slice()[i] = array.value(i); - } -} - -fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) { - assert!(array.len() <= out.capacity()); - - // TODO: zero copy assignment - for i in 0..array.len() { - let s = array.value(i); - out.insert(i, s); - } -} - -fn list_array_to_vector>( - array: &GenericListArray, - out: &mut ListVector, -) { - let value_array = array.values(); - let mut child = out.child(value_array.len()); - match value_array.data_type() { - dt if dt.is_primitive() => { - primitive_array_to_vector(value_array.as_ref(), &mut child); - for i in 0..array.len() { - let offset = array.value_offsets()[i]; - let length = array.value_length(i); - out.set_entry(i, offset.as_(), length.as_()); - } - } - _ => { - todo!("Nested list is not supported yet."); - } - } -} - -fn fixed_size_list_array_to_vector(array: &FixedSizeListArray, out: &mut ListVector) { - let value_array = array.values(); - let mut child = out.child(value_array.len()); - match value_array.data_type() { - dt if dt.is_primitive() => { - primitive_array_to_vector(value_array.as_ref(), &mut child); - for i in 0..array.len() { - let offset = array.value_offset(i); - let length = array.value_length(); - out.set_entry(i, offset as usize, length as usize); - } - out.set_len(value_array.len()); - } - _ => { - todo!("Nested list is not supported yet."); - } - } -} - -fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) { - for i in 0..array.num_columns() { - let column = array.column(i); - match column.data_type() { - dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => { - primitive_array_to_vector(column, &mut out.child(i)); - } - DataType::Utf8 => { - string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i)); - } - DataType::List(_) => { - list_array_to_vector( - as_list_array(column.as_ref()), - &mut out.list_vector_child(i), - ); - } - DataType::LargeList(_) => { - list_array_to_vector( - as_large_list_array(column.as_ref()), - &mut out.list_vector_child(i), - ); - } - DataType::FixedSizeList(_, _) => { - fixed_size_list_array_to_vector( - as_fixed_size_list_array(column.as_ref()), - &mut out.list_vector_child(i), - ); - } - DataType::Struct(_) => { - let struct_array = as_struct_array(column.as_ref()); - let mut struct_vector = out.struct_vector_child(i); - struct_array_to_vector(struct_array, &mut struct_vector); - } - _ => { - todo!("Unsupported data type: {}, please file an issue at https://github.com/lancedb/lance", column.data_type()); - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use std::sync::Arc; - - use arrow_schema::{Field, Schema}; - - // use libduckdb to link to a duckdb binary. - #[allow(unused_imports)] - use libduckdb_sys; - - #[test] - fn test_record_batch_to_data_chunk() { - let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Boolean, false)])); - - let batch = RecordBatch::try_new( - schema.clone(), - vec![Arc::new(BooleanArray::from(vec![true, false, true]))], - ) - .unwrap(); - - let logical_types = schema - .fields - .iter() - .map(|f| to_duckdb_logical_type(f.data_type()).unwrap()) - .collect::>(); - let mut chunk = DataChunk::new(&logical_types); - - record_batch_to_duckdb_data_chunk(&batch, &mut chunk).unwrap(); - assert_eq!(chunk.len(), 3); - let vector = chunk.flat_vector(0); - assert_eq!(LogicalTypeId::Boolean, vector.logical_type().id()); - assert_eq!(vector.as_slice::()[0], true); - assert_eq!(vector.as_slice::()[1], false); - assert_eq!(vector.as_slice::()[2], true); - } -} diff --git a/integration/duckdb_lance/src/error.rs b/integration/duckdb_lance/src/error.rs deleted file mode 100644 index aac3495c11..0000000000 --- a/integration/duckdb_lance/src/error.rs +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#[derive(Debug)] -pub enum Error { - DuckDB(String), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let (catalog, message) = match self { - Self::DuckDB(s) => ("DuckDB", s.as_str()), - }; - write!(f, "Lance({catalog}): {message}") - } -} - -pub type Result = std::result::Result; - -// TODO: contribute to upstream (duckdb-extension) to have a Error impl. -impl From> for Error { - fn from(value: Box) -> Self { - Self::DuckDB(value.to_string()) - } -} - -impl From for duckdb_ext::Error { - fn from(e: Error) -> Self { - Self::DuckDB(e.to_string()) - } -} - -impl From for Error { - fn from(e: duckdb_ext::Error) -> Self { - Self::DuckDB(e.to_string()) - } -} diff --git a/integration/duckdb_lance/src/extension.c b/integration/duckdb_lance/src/extension.c deleted file mode 100644 index 35e1616369..0000000000 --- a/integration/duckdb_lance/src/extension.c +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2023 Lance Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/// Callbacks for duckdb to load lance (rust) code. - -#include "extension.h" - -const char* lance_version_rust(void); -void lance_init_rust(void* db); - -DUCKDB_EXTENSION_API const char* lance_version() { - return lance_version_rust(); -} - -DUCKDB_EXTENSION_API void lance_init(void* db) { - lance_init_rust(db); -} - diff --git a/integration/duckdb_lance/src/extension.h b/integration/duckdb_lance/src/extension.h deleted file mode 100644 index f58dd56c9e..0000000000 --- a/integration/duckdb_lance/src/extension.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2023 Lance Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#define DUCKDB_EXTENSION_API - -#include "duckdb.h" diff --git a/integration/duckdb_lance/src/lib.rs b/integration/duckdb_lance/src/lib.rs deleted file mode 100644 index 8af4410ccc..0000000000 --- a/integration/duckdb_lance/src/lib.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ffi::c_char; - -use duckdb_ext::ffi::{_duckdb_database, duckdb_library_version}; -use duckdb_ext::Database; -use tokio::runtime::Runtime; - -mod arrow; -pub mod error; -mod scan; - -use crate::scan::scan_table_function; -use error::{Error, Result}; - -lazy_static::lazy_static! { - static ref RUNTIME: Runtime = tokio::runtime::Runtime::new() - .expect("Creating Tokio runtime"); -} - -#[no_mangle] -pub extern "C" fn lance_version_rust() -> *const c_char { - unsafe { duckdb_library_version() } -} - -#[no_mangle] -pub unsafe extern "C" fn lance_init_rust(db: *mut _duckdb_database) { - init(db).expect("duckdb lance extension init failed"); -} - -unsafe fn init(db: *mut _duckdb_database) -> Result<()> { - let db = Database::from(db); - let table_function = scan_table_function(); - let connection = db.connect()?; - connection.register_table_function(table_function)?; - Ok(()) -} - -#[cfg(test)] -mod tests {} diff --git a/integration/duckdb_lance/src/scan.rs b/integration/duckdb_lance/src/scan.rs deleted file mode 100644 index 8fe8d407e8..0000000000 --- a/integration/duckdb_lance/src/scan.rs +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2023 Lance Developers -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ffi::{c_char, c_void, CStr, CString}; - -use duckdb_ext::ffi::{ - duckdb_bind_info, duckdb_data_chunk, duckdb_free, duckdb_function_info, duckdb_init_info, - duckdb_vector_size, -}; -use duckdb_ext::table_function::{BindInfo, InitInfo, TableFunction}; -use duckdb_ext::{DataChunk, FunctionInfo, LogicalType, LogicalTypeId}; -use futures::StreamExt; -use lance::dataset::scanner::DatasetRecordBatchStream; -use lance::dataset::Dataset; - -use crate::arrow::{record_batch_to_duckdb_data_chunk, to_duckdb_logical_type}; - -#[repr(C)] -struct ScanBindData { - /// Dataset URI - uri: *mut c_char, -} - -impl ScanBindData { - fn new(uri: &str) -> Self { - Self { - uri: CString::new(uri).expect("Bind uri").into_raw(), - } - } -} - -/// Drop the ScanBindData from C. -/// -/// # Safety -unsafe extern "C" fn drop_scan_bind_data_c(v: *mut c_void) { - let actual = v.cast::(); - drop(CString::from_raw((*actual).uri.cast())); - duckdb_free(v); -} - -#[repr(C)] -struct ScanInitData { - stream: *mut DatasetRecordBatchStream, - - done: bool, -} - -impl ScanInitData { - fn new(stream: Box) -> Self { - Self { - stream: Box::into_raw(stream), - done: false, - } - } -} - -#[no_mangle] -unsafe extern "C" fn read_lance(info: duckdb_function_info, output: duckdb_data_chunk) { - let info = FunctionInfo::from(info); - let mut output = DataChunk::from(output); - - let init_data = info.init_data::(); - let batch = match crate::RUNTIME.block_on(async { (*(*init_data).stream).next().await }) { - Some(Ok(b)) => Some(b), - Some(Err(e)) => { - info.set_error(duckdb_ext::Error::DuckDB(e.to_string())); - return; - } - None => None, - }; - - if let Some(b) = batch { - if let Err(e) = record_batch_to_duckdb_data_chunk(&b, &mut output) { - info.set_error(e.into()) - }; - } else { - (*init_data).done = true; - output.set_len(0); - } -} - -#[no_mangle] -unsafe extern "C" fn read_lance_init(info: duckdb_init_info) { - let info = InitInfo::from(info); - let bind_data = info.bind_data::(); - - let uri = CStr::from_ptr((*bind_data).uri); - let dataset = - match crate::RUNTIME.block_on(async { Dataset::open(uri.to_str().unwrap()).await }) { - Ok(d) => Box::new(d), - Err(e) => { - info.set_error(duckdb_ext::Error::DuckDB(e.to_string())); - return; - } - }; - let projected_columns = info.projected_column_ids(); - let columns = projected_columns - .iter() - .map(|proj_id| dataset.schema().fields[*proj_id].name.as_str()) - .collect::>(); - - let stream = match crate::RUNTIME.block_on(async { - dataset - .scan() - .project(columns.as_slice()) - .unwrap() - .batch_size(duckdb_vector_size() as usize) - .try_into_stream() - .await - }) { - Ok(s) => Box::new(s), - Err(e) => { - info.set_error(duckdb_ext::Error::DuckDB(e.to_string())); - return; - } - }; - - let init_data = Box::new(ScanInitData::new(stream)); - info.set_init_data(Box::into_raw(init_data).cast(), Some(duckdb_free)); -} - -#[no_mangle] -unsafe extern "C" fn read_lance_bind_c(bind_info: duckdb_bind_info) { - let bind_info = BindInfo::from(bind_info); - assert!(bind_info.num_parameters() > 0); - - read_lance_bind(&bind_info); -} - -fn read_lance_bind(bind: &BindInfo) { - let uri = bind.parameter(0).to_string(); - let dataset = match crate::RUNTIME.block_on(async { Dataset::open(&uri).await }) { - Ok(d) => d, - Err(e) => { - bind.set_error(duckdb_ext::Error::DuckDB(e.to_string())); - return; - } - }; - - let schema = dataset.schema(); - for field in schema.fields.iter() { - bind.add_result_column( - &field.name, - to_duckdb_logical_type(&field.data_type()).unwrap(), - ); - } - - let bind_data = Box::new(ScanBindData::new(&uri)); - bind.set_bind_data(Box::into_raw(bind_data).cast(), Some(drop_scan_bind_data_c)); -} - -pub fn scan_table_function() -> TableFunction { - let table_function = TableFunction::new("lance_scan"); - let logical_type = LogicalType::new(LogicalTypeId::Varchar); - table_function.add_parameter(&logical_type); - - table_function.set_function(Some(read_lance)); - table_function.set_init(Some(read_lance_init)); - table_function.set_bind(Some(read_lance_bind_c)); - table_function.pushdown(true); - // TODO: add filter push down. - table_function -}