Skip to content

Commit

Permalink
string handling
Browse files Browse the repository at this point in the history
  • Loading branch information
jraymakers committed Jul 21, 2024
1 parent d90d88d commit 4a53e29
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 16 deletions.
20 changes: 14 additions & 6 deletions alt/bindings/src/duckdb.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,10 @@ export function data_chunk_set_size(chunk: DataChunk, size: number): void;
export function vector_get_column_type(vector: Vector): LogicalType;

// void *duckdb_vector_get_data(duckdb_vector vector)
export function vector_get_data(vector: Vector, byteCount: number): Buffer;
export function vector_get_data(vector: Vector, byte_count: number): Uint8Array;

// uint64_t *duckdb_vector_get_validity(duckdb_vector vector)
export function vector_get_validity(vector: Vector, byteCount: number): Buffer;
export function vector_get_validity(vector: Vector, byte_cunt: number): Uint8Array;

// void duckdb_vector_ensure_validity_writable(duckdb_vector vector)
export function vector_ensure_validity_writable(vector: Vector): void;
Expand Down Expand Up @@ -634,16 +634,16 @@ export function struct_vector_get_child(vector: Vector, index: number): Vector;
export function array_vector_get_child(vector: Vector): Vector;

// bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row)
export function validity_row_is_valid(validity: Buffer, row_index: number): boolean;
export function validity_row_is_valid(validity: Uint8Array, row_index: number): boolean;

// void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid)
export function validity_set_row_validity(validity: Buffer, row_index: number, valid: boolean): void;
export function validity_set_row_validity(validity: Uint8Array, row_index: number, valid: boolean): void;

// void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row)
export function validity_set_row_invalid(validity: Buffer, row_index: number): void;
export function validity_set_row_invalid(validity: Uint8Array, row_index: number): void;

// void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row)
export function validity_set_row_valid(validity: Buffer, row_index: number): void;
export function validity_set_row_valid(validity: Uint8Array, row_index: number): void;

// duckdb_state duckdb_appender_create(duckdb_connection connection, const char *schema, const char *table, duckdb_appender *out_appender)
export function appender_create(connection: Connection, schema: string, table: string): Appender;
Expand Down Expand Up @@ -739,3 +739,11 @@ export function append_data_chunk(appender: Appender, chunk: DataChunk): State;

// duckdb_data_chunk duckdb_fetch_chunk(duckdb_result result)
export function fetch_chunk(result: Result): Promise<DataChunk>;

// ADDED
/**
* Read a pointer from `array_buffer` at `pointer_offset`, then read and return `byte_count` bytes from that pointer.
*
* Used to read from `duckdb_string_t`s with non-inlined data that are embedded in VARCHAR, BLOB, and BIT vectors.
*/
export function get_data_from_pointer(array_buffer: ArrayBuffer, pointer_offset: number, byte_count: number): Uint8Array;
29 changes: 21 additions & 8 deletions alt/bindings/src/duckdb_node_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ class DuckDBNodeAddon : public Napi::Addon<DuckDBNodeAddon> {
// TODO: duckdb_validity_set_row_valid

InstanceMethod("fetch_chunk", &DuckDBNodeAddon::fetch_chunk),

InstanceMethod("get_data_from_pointer", &DuckDBNodeAddon::get_data_from_pointer),
});
}

Expand Down Expand Up @@ -941,23 +943,23 @@ class DuckDBNodeAddon : public Napi::Addon<DuckDBNodeAddon> {
// TODO

// void *duckdb_vector_get_data(duckdb_vector vector)
// function vector_get_data(vector: Vector, length: number): Buffer
// function vector_get_data(vector: Vector, byte_count: number): Uint8Array
Napi::Value vector_get_data(const Napi::CallbackInfo& info) {
auto env = info.Env();
auto vector = GetVectorFromExternal(env, info[0]);
auto byteCount = info[1].As<Napi::Number>().Uint32Value();
auto byte_count = info[1].As<Napi::Number>().Uint32Value();
void *data = duckdb_vector_get_data(vector);
return Napi::Buffer<uint8_t>::NewOrCopy(env, reinterpret_cast<uint8_t*>(data), byteCount);
return Napi::Buffer<uint8_t>::NewOrCopy(env, reinterpret_cast<uint8_t*>(data), byte_count);
}

// uint64_t *duckdb_vector_get_validity(duckdb_vector vector)
// function vector_get_validity(vector: Vector, byteCount: number): Buffer
// function vector_get_validity(vector: Vector, byte_count: number): Uint8Array
Napi::Value vector_get_validity(const Napi::CallbackInfo& info) {
auto env = info.Env();
auto vector = GetVectorFromExternal(env, info[0]);
auto byteCount = info[1].As<Napi::Number>().Uint32Value();
auto byte_count = info[1].As<Napi::Number>().Uint32Value();
uint64_t *data = duckdb_vector_get_validity(vector);
return Napi::Buffer<uint8_t>::NewOrCopy(env, reinterpret_cast<uint8_t*>(data), byteCount);
return Napi::Buffer<uint8_t>::NewOrCopy(env, reinterpret_cast<uint8_t*>(data), byte_count);
}

// void duckdb_vector_ensure_validity_writable(duckdb_vector vector)
Expand Down Expand Up @@ -1010,10 +1012,10 @@ class DuckDBNodeAddon : public Napi::Addon<DuckDBNodeAddon> {
}

// bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row)
// function validity_row_is_valid(validity: Buffer, row_index: number): boolean
// function validity_row_is_valid(validity: Uint8Array, row_index: number): boolean
Napi::Value validity_row_is_valid(const Napi::CallbackInfo& info) {
auto env = info.Env();
auto validity = reinterpret_cast<uint64_t*>(info[0].As<Napi::Buffer<uint8_t>>().Data());
auto validity = reinterpret_cast<uint64_t*>(info[0].As<Napi::Uint8Array>().Data());
auto row_index = info[1].As<Napi::Number>().Uint32Value();
auto valid = duckdb_validity_row_is_valid(validity, row_index);
return Napi::Boolean::New(env, valid);
Expand Down Expand Up @@ -1070,6 +1072,17 @@ class DuckDBNodeAddon : public Napi::Addon<DuckDBNodeAddon> {
return worker->Promise();
}

// ADDED
// function get_data_from_pointer(array_buffer: ArrayBuffer, pointer_offset: number, byte_count: number): Uint8Array
Napi::Value get_data_from_pointer(const Napi::CallbackInfo& info) {
auto env = info.Env();
auto data = reinterpret_cast<uint8_t*>(info[0].As<Napi::ArrayBuffer>().Data());
auto pointer_offset = info[1].As<Napi::Number>().Uint32Value();
auto byte_count = info[2].As<Napi::Number>().Uint32Value();
auto pointer_pointer = reinterpret_cast<uint8_t**>(data + pointer_offset);
auto pointer = *pointer_pointer;
return Napi::Buffer<uint8_t>::NewOrCopy(env, pointer, byte_count);
}

};

Expand Down
48 changes: 46 additions & 2 deletions alt/bindings/test/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,34 @@ function isValid(validity: BigUint64Array, bit: number): boolean {
return (validity[Math.floor(bit / 64)] & (1n << BigInt(bit % 64))) !== 0n;
}

function expectValidity(validity_bytes: Buffer, validity: BigUint64Array, bit: number, expected: boolean) {
function expectValidity(validity_bytes: Uint8Array, validity: BigUint64Array, bit: number, expected: boolean) {
expect(duckdb.validity_row_is_valid(validity_bytes, bit)).toBe(expected);
expect(isValid(validity, bit)).toBe(expected);
}

/**
* Gets the bytes either in or referenced by a `duckdb_string_t`
* that is at `string_byte_offset` of the given `DataView`.
*/
function getStringBytes(dv: DataView, string_byte_offset: number): Uint8Array {
const length_in_bytes = dv.getUint32(string_byte_offset, true);
if (length_in_bytes <= 12) {
return new Uint8Array(dv.buffer, dv.byteOffset + string_byte_offset + 4, length_in_bytes);
} else {
return duckdb.get_data_from_pointer(dv.buffer, dv.byteOffset + string_byte_offset + 8, length_in_bytes);
}
}

const decoder = new TextDecoder();

/**
* Gets the UTF-8 string either in or referenced by a `duckdb_string_t`
* that is at `string_byte_offset` of the given `DataView`.
*/
function getVarchar(dv: DataView, string_byte_offset: number): string {
return decoder.decode(getStringBytes(dv, string_byte_offset));
}

suite('query', () => {
test('basic select', async () => {
const db = await duckdb.open();
Expand Down Expand Up @@ -74,8 +97,14 @@ suite('query', () => {
expect(duckdb.column_count(res)).toBe(53);
expect(duckdb.column_name(res, 0)).toBe('bool');
expect(duckdb.column_type(res, 0)).toBe(duckdb.Type.BOOLEAN);
expect(duckdb.column_name(res, 27)).toBe('varchar');
expect(duckdb.column_type(res, 27)).toBe(duckdb.Type.VARCHAR);
expect(duckdb.column_name(res, 33)).toBe('int_array');
expect(duckdb.column_type(res, 33)).toBe(duckdb.Type.LIST);
expect(duckdb.column_name(res, 40)).toBe('struct');
expect(duckdb.column_type(res, 40)).toBe(duckdb.Type.STRUCT);
expect(duckdb.column_name(res, 45)).toBe('fixed_int_array');
expect(duckdb.column_type(res, 45)).toBe(duckdb.Type.ARRAY);
expect(duckdb.column_name(res, 52)).toBe('list_of_fixed_int_array');
expect(duckdb.column_type(res, 52)).toBe(duckdb.Type.LIST);
const chunk = await duckdb.fetch_chunk(res);
Expand All @@ -100,6 +129,21 @@ suite('query', () => {

expectValidity(bool_validity_bytes, bool_validity, 2, false);

// varchar
const varchar_vector = duckdb.data_chunk_get_vector(chunk, 27);
const varchar_validity_bytes = duckdb.vector_get_validity(varchar_vector, 8);
const varchar_validity = new BigUint64Array(varchar_validity_bytes.buffer, 0, 1);
const varchar_data = duckdb.vector_get_data(varchar_vector, 3*16);
const varchar_dv = new DataView(varchar_data.buffer);

expectValidity(varchar_validity_bytes, varchar_validity, 0, true);
expect(getVarchar(varchar_dv, 0*16)).toBe('πŸ¦†πŸ¦†πŸ¦†πŸ¦†πŸ¦†πŸ¦†');

expectValidity(varchar_validity_bytes, varchar_validity, 1, true);
expect(getVarchar(varchar_dv, 1*16)).toBe('goo\0se');

expectValidity(varchar_validity_bytes, varchar_validity, 2, false);

// int_array
const int_array_vector = duckdb.data_chunk_get_vector(chunk, 33);
const int_array_validity_bytes = duckdb.vector_get_validity(int_array_vector, 8);
Expand Down Expand Up @@ -168,7 +212,7 @@ suite('query', () => {
expect(struct_child0_dv.getInt32(1*4, true)).toBe(42);
expectValidity(struct_child1_validity_bytes, struct_child1_validity, 1, true);
expect(struct_child1_dv.getInt32(1*16, true)).toBe(24);
// TODO: validate string contents
expect(getVarchar(struct_child1_dv, 1*16)).toBe('πŸ¦†πŸ¦†πŸ¦†πŸ¦†πŸ¦†πŸ¦†');

expectValidity(struct_validity_bytes, struct_validity, 2, false);
expectValidity(struct_child0_validity_bytes, struct_child0_validity, 2, false);
Expand Down

0 comments on commit 4a53e29

Please sign in to comment.