From e2ef4a6ad14668d92374c32b2ad3fe3a4b8cc0e5 Mon Sep 17 00:00:00 2001 From: Jeff Raymakers Date: Fri, 17 Jan 2025 17:36:10 -0800 Subject: [PATCH] row and column objects --- api/src/DuckDBDataChunk.ts | 69 +++++++++++++++++++++------ api/src/DuckDBResult.ts | 26 ++++++++++ api/src/DuckDBResultReader.ts | 19 ++++++-- api/src/getColumnsFromChunks.ts | 10 ++-- api/src/getColumnsObjectFromChunks.ts | 24 ++++++++++ api/src/getRowObjectsFromChunks.ts | 20 ++++++++ api/src/getRowsFromChunks.ts | 6 ++- api/test/api.test.ts | 20 ++++++++ 8 files changed, 172 insertions(+), 22 deletions(-) create mode 100644 api/src/getColumnsObjectFromChunks.ts create mode 100644 api/src/getRowObjectsFromChunks.ts diff --git a/api/src/DuckDBDataChunk.ts b/api/src/DuckDBDataChunk.ts index 860a00d1..fd5d633c 100644 --- a/api/src/DuckDBDataChunk.ts +++ b/api/src/DuckDBDataChunk.ts @@ -9,8 +9,13 @@ export class DuckDBDataChunk { constructor(chunk: duckdb.DataChunk) { this.chunk = chunk; } - public static create(types: readonly DuckDBType[], rowCount?: number): DuckDBDataChunk { - const chunk = new DuckDBDataChunk(duckdb.create_data_chunk(types.map(t => t.toLogicalType().logical_type))); + public static create( + types: readonly DuckDBType[], + rowCount?: number + ): DuckDBDataChunk { + const chunk = new DuckDBDataChunk( + duckdb.create_data_chunk(types.map((t) => t.toLogicalType().logical_type)) + ); if (rowCount != undefined) { chunk.rowCount = rowCount; } @@ -39,7 +44,14 @@ export class DuckDBDataChunk { this.vectors[columnIndex] = vector; return vector; } - public visitColumnValues(columnIndex: number, visitValue: (value: DuckDBValue, rowIndex?: number, columnIndex?: number) => void) { + public visitColumnValues( + columnIndex: number, + visitValue: ( + value: DuckDBValue, + rowIndex: number, + columnIndex: number + ) => void + ) { const vector = this.getColumnVector(columnIndex); for (let rowIndex = 0; rowIndex < vector.itemCount; rowIndex++) { visitValue(vector.getItem(rowIndex), rowIndex, columnIndex); @@ -47,7 +59,7 @@ export class DuckDBDataChunk { } public getColumnValues(columnIndex: number): DuckDBValue[] { const values: DuckDBValue[] = []; - this.visitColumnValues(columnIndex, value => values.push(value)); + this.visitColumnValues(columnIndex, (value) => values.push(value)); return values; } public setColumnValues(columnIndex: number, values: readonly DuckDBValue[]) { @@ -60,7 +72,9 @@ export class DuckDBDataChunk { } vector.flush(); } - public visitColumns(visitColumn: (column: DuckDBValue[], columnIndex?: number) => void) { + public visitColumns( + visitColumn: (column: DuckDBValue[], columnIndex: number) => void + ) { const columnCount = this.columnCount; for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { visitColumn(this.getColumnValues(columnIndex), columnIndex); @@ -68,7 +82,7 @@ export class DuckDBDataChunk { } public getColumns(): DuckDBValue[][] { const columns: DuckDBValue[][] = []; - this.visitColumns(column => columns.push(column)); + this.visitColumns((column) => columns.push(column)); return columns; } public setColumns(columns: readonly (readonly DuckDBValue[])[]) { @@ -79,24 +93,41 @@ export class DuckDBDataChunk { this.setColumnValues(columnIndex, columns[columnIndex]); } } - public visitColumnMajor(visitValue: (value: DuckDBValue, rowIndex?: number, columnIndex?: number) => void) { + public visitColumnMajor( + visitValue: ( + value: DuckDBValue, + rowIndex: number, + columnIndex: number + ) => void + ) { const columnCount = this.columnCount; for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { this.visitColumnValues(columnIndex, visitValue); } } - public visitRowValues(rowIndex: number, visitValue: (value: DuckDBValue, rowIndex?: number, columnIndex?: number) => void) { + public visitRowValues( + rowIndex: number, + visitValue: ( + value: DuckDBValue, + rowIndex: number, + columnIndex: number + ) => void + ) { const columnCount = this.columnCount; for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { - visitValue(this.getColumnVector(columnIndex).getItem(rowIndex), rowIndex, columnIndex); + visitValue( + this.getColumnVector(columnIndex).getItem(rowIndex), + rowIndex, + columnIndex + ); } } public getRowValues(rowIndex: number): DuckDBValue[] { const values: DuckDBValue[] = []; - this.visitRowValues(rowIndex, value => values.push(value)); + this.visitRowValues(rowIndex, (value) => values.push(value)); return values; } - public visitRows(visitRow: (row: DuckDBValue[], rowIndex?: number) => void) { + public visitRows(visitRow: (row: DuckDBValue[], rowIndex: number) => void) { const rowCount = this.rowCount; for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { visitRow(this.getRowValues(rowIndex), rowIndex); @@ -104,7 +135,7 @@ export class DuckDBDataChunk { } public getRows(): DuckDBValue[][] { const rows: DuckDBValue[][] = []; - this.visitRows(row => rows.push(row)); + this.visitRows((row) => rows.push(row)); return rows; } public setRows(rows: readonly (readonly DuckDBValue[])[]) { @@ -118,12 +149,22 @@ export class DuckDBDataChunk { vector.flush(); } } - public visitRowMajor(visitValue: (value: DuckDBValue, rowIndex?: number, columnIndex?: number) => void) { + public visitRowMajor( + visitValue: ( + value: DuckDBValue, + rowIndex: number, + columnIndex: number + ) => void + ) { const rowCount = this.rowCount; const columnCount = this.columnCount; for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { - visitValue(this.getColumnVector(columnIndex).getItem(rowIndex), rowIndex, columnIndex); + visitValue( + this.getColumnVector(columnIndex).getItem(rowIndex), + rowIndex, + columnIndex + ); } } } diff --git a/api/src/DuckDBResult.ts b/api/src/DuckDBResult.ts index def4d720..a71a62eb 100644 --- a/api/src/DuckDBResult.ts +++ b/api/src/DuckDBResult.ts @@ -5,6 +5,8 @@ import { DuckDBType } from './DuckDBType'; import { DuckDBTypeId } from './DuckDBTypeId'; import { ResultReturnType, StatementType } from './enums'; import { getColumnsFromChunks } from './getColumnsFromChunks'; +import { getColumnsObjectFromChunks } from './getColumnsObjectFromChunks'; +import { getRowObjectsFromChunks } from './getRowObjectsFromChunks'; import { getRowsFromChunks } from './getRowsFromChunks'; import { DuckDBValue } from './values'; @@ -33,6 +35,22 @@ export class DuckDBResult { } return columnNames; } + public deduplicatedColumnNames(): string[] { + const outputColumnNames: string[] = []; + const columnCount = this.columnCount; + const columnNameCount: { [columnName: string]: number } = {}; + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { + const inputColumnName = this.columnName(columnIndex); + const nameCount = (columnNameCount[inputColumnName] || 0) + 1; + columnNameCount[inputColumnName] = nameCount; + if (nameCount > 1) { + outputColumnNames.push(`${inputColumnName}:${nameCount - 1}`); + } else { + outputColumnNames.push(inputColumnName); + } + } + return outputColumnNames; + } public columnTypeId(columnIndex: number): DuckDBTypeId { return duckdb.column_type( this.result, @@ -81,8 +99,16 @@ export class DuckDBResult { const chunks = await this.fetchAllChunks(); return getColumnsFromChunks(chunks); } + public async getColumnsObject(): Promise> { + const chunks = await this.fetchAllChunks(); + return getColumnsObjectFromChunks(chunks, this.deduplicatedColumnNames()); + } public async getRows(): Promise { const chunks = await this.fetchAllChunks(); return getRowsFromChunks(chunks); } + public async getRowObjects(): Promise[]> { + const chunks = await this.fetchAllChunks(); + return getRowObjectsFromChunks(chunks, this.deduplicatedColumnNames()); + } } diff --git a/api/src/DuckDBResultReader.ts b/api/src/DuckDBResultReader.ts index 14a0fb28..7bd2f87d 100644 --- a/api/src/DuckDBResultReader.ts +++ b/api/src/DuckDBResultReader.ts @@ -5,6 +5,8 @@ import { DuckDBType } from './DuckDBType'; import { DuckDBTypeId } from './DuckDBTypeId'; import { ResultReturnType, StatementType } from './enums'; import { getColumnsFromChunks } from './getColumnsFromChunks'; +import { getColumnsObjectFromChunks } from './getColumnsObjectFromChunks'; +import { getRowObjectsFromChunks } from './getRowObjectsFromChunks'; import { getRowsFromChunks } from './getRowsFromChunks'; import { DuckDBValue } from './values'; @@ -44,6 +46,9 @@ export class DuckDBResultReader { public columnNames(): string[] { return this.result.columnNames(); } + public deduplicatedColumnNames(): string[] { + return this.result.deduplicatedColumnNames(); + } public columnTypeId(columnIndex: number): DuckDBTypeId { return this.result.columnTypeId(columnIndex); } @@ -99,10 +104,10 @@ export class DuckDBResultReader { } // We didn't find our row. It must have been out of range. throw Error( - `Row index ${rowIndex} requested, but only ${this.currentRowCount_} row have been read so far.`, + `Row index ${rowIndex} requested, but only ${this.currentRowCount_} row have been read so far.` ); } - + /** Read all rows. */ public async readAll(): Promise { return this.fetchChunks(); @@ -121,7 +126,8 @@ export class DuckDBResultReader { while ( !( this.done_ || - (targetRowCount !== undefined && this.currentRowCount_ >= targetRowCount) + (targetRowCount !== undefined && + this.currentRowCount_ >= targetRowCount) ) ) { const chunk = await this.result.fetchChunk(); @@ -157,8 +163,15 @@ export class DuckDBResultReader { return getColumnsFromChunks(this.chunks); } + public getColumnsObject(): Record { + return getColumnsObjectFromChunks(this.chunks, this.deduplicatedColumnNames()); + } + public getRows(): DuckDBValue[][] { return getRowsFromChunks(this.chunks); } + public getRowObjecs(): Record[] { + return getRowObjectsFromChunks(this.chunks, this.deduplicatedColumnNames()); + } } diff --git a/api/src/getColumnsFromChunks.ts b/api/src/getColumnsFromChunks.ts index dda5c91d..346ec649 100644 --- a/api/src/getColumnsFromChunks.ts +++ b/api/src/getColumnsFromChunks.ts @@ -1,15 +1,19 @@ import { DuckDBDataChunk } from './DuckDBDataChunk'; import { DuckDBValue } from './values'; -export function getColumnsFromChunks(chunks: readonly DuckDBDataChunk[]): DuckDBValue[][] { +export function getColumnsFromChunks( + chunks: readonly DuckDBDataChunk[] +): DuckDBValue[][] { const columns: DuckDBValue[][] = []; if (chunks.length === 0) { return columns; } - chunks[0].visitColumns(column => columns.push(column)); + chunks[0].visitColumns((column) => columns.push(column)); for (let chunkIndex = 1; chunkIndex < chunks.length; chunkIndex++) { for (let columnIndex = 0; columnIndex < columns.length; columnIndex++) { - chunks[chunkIndex].visitColumnValues(columnIndex, value => columns[columnIndex].push(value)); + chunks[chunkIndex].visitColumnValues(columnIndex, (value) => + columns[columnIndex].push(value) + ); } } return columns; diff --git a/api/src/getColumnsObjectFromChunks.ts b/api/src/getColumnsObjectFromChunks.ts new file mode 100644 index 00000000..49ccb16d --- /dev/null +++ b/api/src/getColumnsObjectFromChunks.ts @@ -0,0 +1,24 @@ +import { DuckDBDataChunk } from './DuckDBDataChunk'; +import { DuckDBValue } from './values'; + +export function getColumnsObjectFromChunks( + chunks: readonly DuckDBDataChunk[], + columnNames: readonly string[], +): Record { + const columnsObject: Record = {}; + for (const columnName of columnNames) { + columnsObject[columnName] = []; + } + if (chunks.length === 0) { + return columnsObject; + } + const columnCount = chunks[0].columnCount; + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { + for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { + chunks[chunkIndex].visitColumnValues(columnIndex, (value) => + columnsObject[columnNames[columnIndex]].push(value) + ); + } + } + return columnsObject; +} diff --git a/api/src/getRowObjectsFromChunks.ts b/api/src/getRowObjectsFromChunks.ts new file mode 100644 index 00000000..52e7a50f --- /dev/null +++ b/api/src/getRowObjectsFromChunks.ts @@ -0,0 +1,20 @@ +import { DuckDBDataChunk } from './DuckDBDataChunk'; +import { DuckDBValue } from './values'; + +export function getRowObjectsFromChunks( + chunks: readonly DuckDBDataChunk[], + columnNames: readonly string[] +): Record[] { + const rowObjects: Record[] = []; + for (const chunk of chunks) { + const rowCount = chunk.rowCount; + for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { + const rowObject: Record = {}; + chunk.visitRowValues(rowIndex, (value, _, columnIndex) => { + rowObject[columnNames[columnIndex]] = value; + }); + rowObjects.push(rowObject); + } + } + return rowObjects; +} diff --git a/api/src/getRowsFromChunks.ts b/api/src/getRowsFromChunks.ts index 67df79a1..3d0453d2 100644 --- a/api/src/getRowsFromChunks.ts +++ b/api/src/getRowsFromChunks.ts @@ -1,10 +1,12 @@ import { DuckDBDataChunk } from './DuckDBDataChunk'; import { DuckDBValue } from './values'; -export function getRowsFromChunks(chunks: readonly DuckDBDataChunk[]): DuckDBValue[][] { +export function getRowsFromChunks( + chunks: readonly DuckDBDataChunk[] +): DuckDBValue[][] { const rows: DuckDBValue[][] = []; for (const chunk of chunks) { - chunk.visitRows(row => rows.push(row)); + chunk.visitRows((row) => rows.push(row)); } return rows; } diff --git a/api/test/api.test.ts b/api/test/api.test.ts index b128c494..bdd52deb 100644 --- a/api/test/api.test.ts +++ b/api/test/api.test.ts @@ -1028,6 +1028,26 @@ describe('api', () => { ]); }); }); + test('row and column objects', async () => { + await withConnection(async (connection) => { + const reader = await connection.runAndReadAll( + 'select i::int as a, i::int + 10 as b, (i + 100)::varchar as a from range(3) t(i)' + ); + assert.deepEqual(reader.columnNames(), ['a', 'b', 'a']); + assert.deepEqual(reader.deduplicatedColumnNames(), ['a', 'b', 'a:1']); + assert.deepEqual(reader.columnTypes(), [INTEGER, INTEGER, VARCHAR]); + assert.deepEqual(reader.getRowObjecs(), [ + { 'a': 0, 'b': 10, 'a:1': '100' }, + { 'a': 1, 'b': 11, 'a:1': '101' }, + { 'a': 2, 'b': 12, 'a:1': '102' }, + ]); + assert.deepEqual(reader.getColumnsObject(), { + 'a': [0, 1, 2], + 'b': [10, 11, 12], + 'a:1': ['100', '101', '102'], + }); + }); + }); test('result reader', async () => { await withConnection(async (connection) => { const reader = await connection.runAndReadAll(