Skip to content

Commit

Permalink
Support attach relational database with schema
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Dec 16, 2024
1 parent 24e669d commit d27412d
Show file tree
Hide file tree
Showing 22 changed files with 102 additions and 23 deletions.
Binary file modified dataset/databases/duckdb_database/tinysnb.db
Binary file not shown.
2 changes: 1 addition & 1 deletion extension/delta/src/connector/delta_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace kuzu {
namespace delta_extension {

void DeltaConnector::connect(const std::string& /*dbPath*/, const std::string& /*catalogName*/,
main::ClientContext* context) {
const std::string& /*schemaName*/, main::ClientContext* context) {
// Creates an in-memory duckdb instance, then install httpfs and attach postgres.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
Expand Down
3 changes: 2 additions & 1 deletion extension/delta/src/function/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
TableFuncBindInput* input) {
auto scanInput = ku_dynamic_cast<ExtraScanTableFuncBindInput*>(input->extraInput.get());
auto connector = std::make_shared<DeltaConnector>();
connector->connect("" /* inMemDB */, "" /* defaultCatalogName */, context);
connector->connect("" /* inMemDB */, "" /* defaultCatalogName */, "" /* defaultSchemaName */,
context);
std::string query = common::stringFormat("SELECT * FROM DELTA_SCAN('{}')",
input->getLiteralVal<std::string>(0));
auto result = connector->executeQuery(query + " LIMIT 1");
Expand Down
2 changes: 1 addition & 1 deletion extension/delta/src/include/connector/delta_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace delta_extension {
class DeltaConnector : public duckdb_extension::DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace delta_extension
Expand Down
13 changes: 13 additions & 0 deletions extension/duckdb/src/catalog/duckdb_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,19 @@ void DuckDBCatalog::init() {
}
}

std::string DuckDBCatalog::bindSchemaName(const binder::AttachOption& options,
const std::string& defaultName) {
if (options.options.contains(DuckDBStorageExtension::SCHEMA_OPTION)) {
auto val = options.options.at(DuckDBStorageExtension::SCHEMA_OPTION);
if (val.getDataType().getLogicalTypeID() != common::LogicalTypeID::STRING) {
throw common::RuntimeException{common::stringFormat("Invalid option value for {}",
DuckDBStorageExtension::SCHEMA_OPTION)};
}
return val.getValue<std::string>();
}
return defaultName;
}

static std::string getQuery(const binder::BoundCreateTableInfo& info) {
auto extraInfo = info.extraInfo->constPtrCast<BoundExtraCreateDuckDBTableInfo>();
return "SELECT {} " + common::stringFormat("FROM \"{}\".{}.{}", extraInfo->catalogName,
Expand Down
2 changes: 1 addition & 1 deletion extension/duckdb/src/connector/local_duckdb_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace kuzu {
namespace duckdb_extension {

void LocalDuckDBConnector::connect(const std::string& dbPath, const std::string& /*catalogName*/,
main::ClientContext* context) {
const std::string& /*schemaName*/, main::ClientContext* context) {
if (!context->getVFSUnsafe()->fileOrPathExists(dbPath, context)) {
throw common::RuntimeException{
common::stringFormat("Given duckdb database path {} does not exist.", dbPath)};
Expand Down
4 changes: 2 additions & 2 deletions extension/duckdb/src/connector/remote_duckdb_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace kuzu {
namespace duckdb_extension {

void HTTPDuckDBConnector::connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* /*context*/) {
const std::string& /*schemaName*/, main::ClientContext* /*context*/) {
// Creates an in-memory duckdb instance, then install httpfs and attach remote duckdb.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
Expand All @@ -18,7 +18,7 @@ void HTTPDuckDBConnector::connect(const std::string& dbPath, const std::string&
}

void S3DuckDBConnector::connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) {
const std::string& /*schemaName*/, main::ClientContext* context) {
// Creates an in-memory duckdb instance, then install httpfs and attach remote duckdb.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
Expand Down
3 changes: 3 additions & 0 deletions extension/duckdb/src/include/catalog/duckdb_catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class DuckDBCatalog : public extension::CatalogExtension {

void init() override;

static std::string bindSchemaName(const binder::AttachOption& options,
const std::string& defaultName);

protected:
bool bindPropertyDefinitions(const std::string& tableName,
std::vector<binder::PropertyDefinition>& propertyDefinitions);
Expand Down
2 changes: 1 addition & 1 deletion extension/duckdb/src/include/connector/duckdb_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class DuckDBConnector {
virtual ~DuckDBConnector() = default;

virtual void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) = 0;
const std::string& schemaName, main::ClientContext* context) = 0;

std::unique_ptr<duckdb::MaterializedQueryResult> executeQuery(std::string query) const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace duckdb_extension {
class LocalDuckDBConnector : public DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace duckdb_extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ namespace duckdb_extension {
class HTTPDuckDBConnector : public DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

class S3DuckDBConnector : public DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace duckdb_extension
Expand Down
2 changes: 2 additions & 0 deletions extension/duckdb/src/include/storage/duckdb_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class DuckDBStorageExtension final : public storage::StorageExtension {

static constexpr const char* SKIP_UNSUPPORTED_TABLE_KEY = "SKIP_UNSUPPORTED_TABLE";

static constexpr const char* SCHEMA_OPTION = "SCHEMA";

static constexpr bool SKIP_UNSUPPORTED_TABLE_DEFAULT_VAL = false;

explicit DuckDBStorageExtension(main::Database* database);
Expand Down
9 changes: 7 additions & 2 deletions extension/duckdb/src/storage/duckdb_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

#include <filesystem>

#include "binder/bound_attach_info.h"
#include "catalog/duckdb_catalog.h"
#include "common/exception/runtime.h"
#include "common/file_system/virtual_file_system.h"
#include "common/string_utils.h"
#include "connector/connector_factory.h"
Expand All @@ -24,10 +26,13 @@ std::unique_ptr<main::AttachedDatabase> attachDuckDB(std::string dbName, std::st
if (dbName == "") {
dbName = catalogName;
}
auto schemaName =
DuckDBCatalog::bindSchemaName(attachOption, DuckDBStorageExtension::DEFAULT_SCHEMA_NAME);
auto connector = DuckDBConnectorFactory::getDuckDBConnector(dbPath);
connector->connect(dbPath, catalogName, clientContext);
connector->connect(dbPath, catalogName, schemaName, clientContext);

auto duckdbCatalog = std::make_unique<DuckDBCatalog>(std::move(dbPath), std::move(catalogName),
DuckDBStorageExtension::DEFAULT_SCHEMA_NAME, clientContext, *connector, attachOption);
schemaName, clientContext, *connector, attachOption);
duckdbCatalog->init();
return std::make_unique<AttachedDuckDBDatabase>(dbName, DuckDBStorageExtension::DB_TYPE,
std::move(duckdbCatalog), std::move(connector));
Expand Down
9 changes: 9 additions & 0 deletions extension/duckdb/test/test_files/duckdb.test
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ Catalog exception: personxx does not exist in catalog.
-STATEMENT CALL TABLE_INFO('tinysnbxx.person') RETURN *;
---- error
Runtime exception: Database: tinysnbxx doesn't exist.
-LOG AttachWithNonDefaultSchema
-STATEMENT ATTACH '${KUZU_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as kuzu (dbtype duckdb, schema = 'kuzu');
---- 1
Attached database successfully.
-STATEMENT LOAD FROM kuzu.employee RETURN *;
---- 3
0|alice|52
2|carol|23
10|dan|72

-CASE InvalidDuckDBDatabase
-STATEMENT LOAD FROM tinysnb1.person RETURN *;
Expand Down
6 changes: 3 additions & 3 deletions extension/postgres/src/connector/postgres_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ namespace kuzu {
namespace postgres_extension {

void PostgresConnector::connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* /*context*/) {
const std::string& schemaName, main::ClientContext* /*context*/) {
// Creates an in-memory duckdb instance, then install httpfs and attach postgres.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
executeQuery("install postgres;");
executeQuery("load postgres;");
executeQuery(
common::stringFormat("attach '{}' as {} (TYPE postgres, read_only);", dbPath, catalogName));
executeQuery(common::stringFormat("attach '{}' as {} (TYPE postgres, SCHEMA {}, read_only);",
dbPath, catalogName, schemaName));
}

} // namespace postgres_extension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace postgres_extension {
class PostgresConnector : public duckdb_extension::DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace postgres_extension
Expand Down
7 changes: 5 additions & 2 deletions extension/postgres/src/storage/postgres_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "extension/extension.h"
#include "function/clear_cache.h"
#include "storage/attached_duckdb_database.h"
#include "storage/duckdb_storage.h"

namespace kuzu {
namespace postgres_extension {
Expand All @@ -28,10 +29,12 @@ std::unique_ptr<main::AttachedDatabase> attachPostgres(std::string dbName, std::
if (dbName == "") {
dbName = catalogName;
}
auto schemaName = duckdb_extension::DuckDBCatalog::bindSchemaName(attachOption,
PostgresStorageExtension::DEFAULT_SCHEMA_NAME);
auto connector = std::make_unique<PostgresConnector>();
connector->connect(dbPath, catalogName, clientContext);
connector->connect(dbPath, catalogName, schemaName, clientContext);
auto catalog = std::make_unique<duckdb_extension::DuckDBCatalog>(dbPath, catalogName,
PostgresStorageExtension::DEFAULT_SCHEMA_NAME, clientContext, *connector, attachOption);
schemaName, clientContext, *connector, attachOption);
catalog->init();
return std::make_unique<duckdb_extension::AttachedDuckDBDatabase>(dbName,
PostgresStorageExtension::DB_TYPE, std::move(catalog), std::move(connector));
Expand Down
37 changes: 35 additions & 2 deletions extension/postgres/test/test_files/create_test_db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
-- PostgreSQL database dump
--

-- Dumped from database version 14.10 (Homebrew)
-- Dumped by pg_dump version 14.10 (Homebrew)
-- Dumped from database version 14.13 (Homebrew)
-- Dumped by pg_dump version 14.13 (Homebrew)

SET statement_timeout = 0;
SET lock_timeout = 0;
Expand All @@ -16,6 +16,15 @@ SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

--
-- Name: kuzu; Type: SCHEMA; Schema: -; Owner: ci
--

CREATE SCHEMA kuzu;


ALTER SCHEMA kuzu OWNER TO ci;

--
-- Name: audience_type; Type: TYPE; Schema: public; Owner: ci
--
Expand Down Expand Up @@ -93,6 +102,19 @@ SET default_tablespace = '';

SET default_table_access_method = heap;

--
-- Name: user; Type: TABLE; Schema: kuzu; Owner: ci
--

CREATE TABLE kuzu."user" (
id integer,
org character varying,
rate integer
);


ALTER TABLE kuzu."user" OWNER TO ci;

--
-- Name: movies; Type: TABLE; Schema: public; Owner: ci
--
Expand Down Expand Up @@ -165,6 +187,17 @@ CREATE TABLE public.persontest (

ALTER TABLE public.persontest OWNER TO ci;

--
-- Data for Name: user; Type: TABLE DATA; Schema: kuzu; Owner: ci
--

COPY kuzu."user" (id, org, rate) FROM stdin;
5 apple 4
7 ms 5
9 blackberry 7
\.


--
-- Data for Name: movies; Type: TABLE DATA; Schema: public; Owner: ci
--
Expand Down
9 changes: 9 additions & 0 deletions extension/postgres/test/test_files/postgres.test
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,12 @@ The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie
11|usednames|STRING[]|NULL
12|height|DOUBLE|NULL
13|u|UUID|NULL
-LOG AttachPGWithNonDefaultSchema
-STATEMENT ATTACH 'dbname=pgscan user=ci host=localhost' as ku (dbtype POSTGRES, SCHEMA = 'kuzu');
---- 1
Attached database successfully.
-STATEMENT LOAD FROM ku.user RETURN *;
---- 3
5|apple|4
7|ms|5
9|blackberry|7
2 changes: 1 addition & 1 deletion extension/sqlite/src/connector/sqlite_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace kuzu {
namespace sqlite_extension {

void SqliteConnector::connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* /*context*/) {
const std::string& /*schemaName*/, main::ClientContext* /*context*/) {
// Creates an in-memory duckdb instance, then install httpfs and attach SQLITE.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
Expand Down
2 changes: 1 addition & 1 deletion extension/sqlite/src/include/connector/sqlite_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace sqlite_extension {
class SqliteConnector : public duckdb_extension::DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
main::ClientContext* context) override;
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace sqlite_extension
Expand Down
3 changes: 2 additions & 1 deletion extension/sqlite/src/storage/sqlite_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ std::unique_ptr<main::AttachedDatabase> attachSqlite(std::string dbName, std::st
dbName = catalogName;
}
auto connector = std::make_unique<SqliteConnector>();
connector->connect(dbPath, catalogName, clientContext);
connector->connect(dbPath, catalogName, SqliteStorageExtension::DEFAULT_SCHEMA_NAME,
clientContext);
auto catalog = std::make_unique<duckdb_extension::DuckDBCatalog>(dbPath, catalogName,
SqliteStorageExtension::DEFAULT_SCHEMA_NAME, clientContext, *connector, attachOption);
catalog->init();
Expand Down

0 comments on commit d27412d

Please sign in to comment.