Skip to content

Commit

Permalink
Merge pull request #28 from duckdb/jray/api-varint
Browse files Browse the repository at this point in the history
api: implement varint, fix timestamp_tz vector type
  • Loading branch information
jraymakers authored Oct 20, 2024
2 parents 3e8588b + 568d95c commit 0219c0e
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 29 deletions.
98 changes: 93 additions & 5 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os from 'os';
import duckdb from '@duckdb/node-bindings';
import os from 'os';
import { DuckDBLogicalType } from './DuckDBLogicalType';
import {
DuckDBArrayType,
Expand All @@ -25,6 +25,7 @@ import {
DuckDBTimestampMillisecondsType,
DuckDBTimestampNanosecondsType,
DuckDBTimestampSecondsType,
DuckDBTimestampTZType,
DuckDBTimestampType,
DuckDBTinyIntType,
DuckDBType,
Expand All @@ -36,6 +37,7 @@ import {
DuckDBUUIDType,
DuckDBUnionType,
DuckDBVarCharType,
DuckDBVarIntType,
} from './DuckDBType';
import { DuckDBTypeId } from './DuckDBTypeId';

Expand Down Expand Up @@ -122,6 +124,26 @@ function getBuffer(dataView: DataView, offset: number): Buffer | null {
return Buffer.from(stringBytes);
}

function getVarIntFromBytes(bytes: Uint8Array): bigint {
const firstByte = bytes[0];
const positive = (firstByte & 0x80) > 0;
const uint64Mask = positive ? 0n : 0xffffffffffffffffn;
const uint8Mask = positive ? 0 : 0xff;
const dv = new DataView(bytes.buffer, bytes.byteOffset + 3, bytes.byteLength - 3);
const lastUint64Offset = dv.byteLength - 8;
let offset = 0;
let result = 0n;
while (offset <= lastUint64Offset) {
result = (result << 64n) | (dv.getBigUint64(offset) ^ uint64Mask);
offset += 8;
}
while (offset < dv.byteLength) {
result = (result << 8n) | BigInt(dv.getUint8(offset) ^ uint8Mask);
offset += 1;
}
return positive ? result : -result;
}

function getBoolean1(dataView: DataView, offset: number): boolean {
return getUInt8(dataView, offset) !== 0
}
Expand Down Expand Up @@ -360,13 +382,13 @@ export abstract class DuckDBVector<T> {
case DuckDBTypeId.TIME_TZ:
return DuckDBTimeTZVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIMESTAMP_TZ:
return DuckDBTimestampVector.fromRawVector(vector, itemCount);
return DuckDBTimestampTZVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.ANY:
throw new Error(`Vector not implemented for ANY type`);
throw new Error(`Invalid vector type: ANY`);
case DuckDBTypeId.VARINT:
return DuckDBBlobVector.fromRawVector(vector, itemCount); // TODO: VARINT
return DuckDBVarIntVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.SQLNULL:
throw new Error(`Vector not implemented for SQLNULL type`);
throw new Error(`Invalid vector type: SQLNULL`);
default:
throw new Error(`Invalid type id: ${vectorType.typeId}`);
}
Expand Down Expand Up @@ -1774,3 +1796,69 @@ export class DuckDBTimeTZVector extends DuckDBVector<DuckDBTimeTZValue> {
return new DuckDBTimeTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBTimestampTZVector extends DuckDBVector<bigint> {
private readonly items: BigInt64Array;
private readonly validity: DuckDBValidity;
constructor(items: BigInt64Array, validity: DuckDBValidity) {
super();
this.items = items;
this.validity = validity;
}
static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBTimestampTZVector {
const data = vectorData(vector, itemCount * BigInt64Array.BYTES_PER_ELEMENT);
const items = new BigInt64Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBTimestampTZVector(items, validity);
}
public override get type(): DuckDBTimestampType {
return DuckDBTimestampTZType.instance;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): bigint | null { // microseconds
return this.validity.itemValid(itemIndex) ? this.items[itemIndex] : null;
}
public override slice(offset: number, length: number): DuckDBTimestampTZVector {
return new DuckDBTimestampTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBVarIntVector extends DuckDBVector<bigint> {
private readonly dataView: DataView;
private readonly validity: DuckDBValidity;
private readonly _itemCount: number;
constructor(dataView: DataView, validity: DuckDBValidity, itemCount: number) {
super();
this.dataView = dataView;
this.validity = validity;
this._itemCount = itemCount;
}
static fromRawVector(vector: duckdb.Vector, itemCount: number): DuckDBVarIntVector {
const data = vectorData(vector, itemCount * 16);
const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBVarIntVector(dataView, validity, itemCount);
}
public override get type(): DuckDBVarIntType {
return DuckDBVarIntType.instance;
}
public override get itemCount(): number {
return this._itemCount;
}
public override getItem(itemIndex: number): bigint | null {
if (!this.validity.itemValid(itemIndex)) {
return null;
}
const bytes = getStringBytes(this.dataView, itemIndex * 16);
return bytes ? getVarIntFromBytes(bytes) : null;
}
public override slice(offset: number, length: number): DuckDBVarIntVector {
return new DuckDBVarIntVector(
new DataView(this.dataView.buffer, this.dataView.byteOffset + offset * 16, length * 16),
this.validity.slice(offset),
length,
);
}
}
32 changes: 8 additions & 24 deletions api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import {
DuckDBTimestampSecondsType,
DuckDBTimestampSecondsVector,
DuckDBTimestampTZType,
DuckDBTimestampTZVector,
DuckDBTimestampType,
DuckDBTimestampVector,
DuckDBTinyIntType,
Expand All @@ -85,6 +86,7 @@ import {
DuckDBVarCharType,
DuckDBVarCharVector,
DuckDBVarIntType,
DuckDBVarIntVector,
DuckDBVector,
configurationOptionDescriptions,
version
Expand Down Expand Up @@ -145,7 +147,7 @@ const MinTimeTZOffset = -MaxTimeTZOffset;
const MinTimeTZ = new DuckDBTimeTZValue(MinTimeTZMicroseconds, MaxTimeTZOffset);
const MaxTimeTZ = new DuckDBTimeTZValue(MaxTimeTZMicroseconds, MinTimeTZOffset);
const MinTS_S = BigInt(-9223372022400); // from test_all_types() select epoch(timestamp_s)::bigint;
const MaxTS_S = BigInt(9223372036854);
const MaxTS_S = BigInt( 9223372036854);
const MinTS_MS = MinTS_S * BI_1000;
const MaxTS_MS = (MaxInt64 - BI_1) / BI_1000;
const MinTS_US = MinTS_MS * BI_1000;
Expand All @@ -154,31 +156,13 @@ const TS_US_Inf = MaxInt64;
const MinTS_NS = -9223286400000000000n;
const MaxTS_NS = MaxInt64 - BI_1;
const MinFloat32 = Math.fround(-3.4028235e+38);
const MaxFloat32 = Math.fround(3.4028235e+38);
const MaxFloat32 = Math.fround( 3.4028235e+38);
const MinFloat64 = -Number.MAX_VALUE;
const MaxFloat64 = Number.MAX_VALUE;
const MinUUID = MinInt128;
const MaxUUID = MaxInt128;
const MinVarInt = new Uint8Array([0x7F, 0xFF, 0x7F,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
]);
const MaxVarInt = new Uint8Array([0x80, 0x00, 0x80,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]);
const MinVarInt = -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n
const MaxVarInt = 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368n;

async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
Expand Down Expand Up @@ -544,15 +528,15 @@ describe('api', () => {
assertValues(chunk, 8, DuckDBUSmallIntVector, [MinUInt16, MaxUInt16, null]);
assertValues(chunk, 9, DuckDBUIntegerVector, [MinUInt32, MaxUInt32, null]);
assertValues(chunk, 10, DuckDBUBigIntVector, [MinUInt64, MaxUInt64, null]);
assertValues(chunk, 11, DuckDBBlobVector, [MinVarInt, MaxVarInt, null]);
assertValues(chunk, 11, DuckDBVarIntVector, [MinVarInt, MaxVarInt, null]);
assertValues(chunk, 12, DuckDBDateVector, [MinDate, MaxDate, null]);
assertValues(chunk, 13, DuckDBTimeVector, [MinTime, MaxTime, null]);
assertValues(chunk, 14, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 15, DuckDBTimestampSecondsVector, [MinTS_S, MaxTS_S, null]);
assertValues(chunk, 16, DuckDBTimestampMillisecondsVector, [MinTS_MS, MaxTS_MS, null]);
assertValues(chunk, 17, DuckDBTimestampNanosecondsVector, [MinTS_NS, MaxTS_NS, null]);
assertValues(chunk, 18, DuckDBTimeTZVector, [MinTimeTZ, MaxTimeTZ, null]);
assertValues(chunk, 19, DuckDBTimestampVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 19, DuckDBTimestampTZVector, [MinTS_US, MaxTS_US, null]);
assertValues(chunk, 20, DuckDBFloatVector, [MinFloat32, MaxFloat32, null]);
assertValues(chunk, 21, DuckDBDoubleVector, [MinFloat64, MaxFloat64, null]);
assertValues(chunk, 22, DuckDBDecimal2Vector, [
Expand Down

0 comments on commit 0219c0e

Please sign in to comment.