Skip to content

Commit

Permalink
GH-14791: [JS] Fix BitmapBufferBuilder size truncation (#14881)
Browse files Browse the repository at this point in the history
* 730e9c5 updates `ts-jest` configuration to remove deprecation warnings
* e4d83f2 updates `print-buffer-alignment.js`  debug utility for latest APIs
* 3b9d18c updates `arrow2csv` to print zero-based rowIds
* b6c42f3 fixes #14791


* Closes: #14791

Authored-by: ptaylor <[email protected]>
Signed-off-by: Dominik Moritz <[email protected]>
  • Loading branch information
trxcllnt authored Dec 9, 2022
1 parent d1a550c commit 01a30a8
Show file tree
Hide file tree
Showing 19 changed files with 358 additions and 188 deletions.
107 changes: 74 additions & 33 deletions js/bin/print-buffer-alignment.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,60 +22,101 @@
const fs = require('fs');
const path = require('path');
const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '.cjs';
const { RecordBatch, AsyncMessageReader } = require(`../index${extension}`);
const { VectorLoader } = require(`../targets/apache-arrow/visitor/vectorloader`);
const { RecordBatch, AsyncMessageReader, makeData, Struct, Schema, Field } = require(`../index${extension}`);

(async () => {

const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
const reader = new AsyncMessageReader(readable);

let schema, recordBatchIndex = 0, dictionaryBatchIndex = 0;
let schema, metadataLength, message;
let byteOffset = 0;
let recordBatchCount = 0;
let dictionaryBatchCount = 0;

for await (const message of reader) {

let bufferRegions = [];
while (1) {
if ((metadataLength = (await reader.readMetadataLength())).done) { break; }
if (metadataLength.value === -1) {
if ((metadataLength = (await reader.readMetadataLength())).done) { break; }
}
if ((message = (await reader.readMetadata(metadataLength.value))).done) { break; }

if (message.isSchema()) {
schema = message.header();
continue;
} else if (message.isRecordBatch()) {
const header = message.header();
bufferRegions = header.buffers;
const body = await reader.readMessageBody(message.bodyLength);
if (message.value.isSchema()) {
console.log(
`Schema:`,
{
byteOffset,
metadataLength: metadataLength.value,
});
schema = message.value.header();
byteOffset += metadataLength.value;
} else if (message.value.isRecordBatch()) {
const header = message.value.header();
const bufferRegions = header.buffers;
const body = await reader.readMessageBody(message.value.bodyLength);
const recordBatch = loadRecordBatch(schema, header, body);
console.log(`record batch ${++recordBatchIndex}: ${JSON.stringify({
offset: body.byteOffset,
length: body.byteLength,
numRows: recordBatch.length,
})}`);
} else if (message.isDictionaryBatch()) {
const header = message.header();
bufferRegions = header.data.buffers;
console.log(
`RecordBatch ${++recordBatchCount}:`,
{
numRows: recordBatch.numRows,
byteOffset,
metadataLength: metadataLength.value,
bodyByteLength: body.byteLength,
});
byteOffset += metadataLength.value;
bufferRegions.forEach(({ offset, length: byteLength }, i) => {
console.log(`\tbuffer ${i + 1}:`, { byteOffset: byteOffset + offset, byteLength });
});
byteOffset += body.byteLength;
} else if (message.value.isDictionaryBatch()) {
const header = message.value.header();
const bufferRegions = header.data.buffers;
const type = schema.dictionaries.get(header.id);
const body = await reader.readMessageBody(message.bodyLength);
const body = await reader.readMessageBody(message.value.bodyLength);
const recordBatch = loadDictionaryBatch(header.data, body, type);
console.log(`dictionary batch ${++dictionaryBatchIndex}: ${JSON.stringify({
offset: body.byteOffset,
length: body.byteLength,
numRows: recordBatch.length,
dictionaryId: header.id,
})}`);
console.log(
`DictionaryBatch ${++dictionaryBatchCount}:`,
{
id: header.id,
numRows: recordBatch.numRows,
byteOffset,
metadataLength: metadataLength.value,
bodyByteLength: body.byteLength,
});
byteOffset += metadataLength.value;
bufferRegions.forEach(({ offset, length: byteLength }, i) => {
console.log(`\tbuffer ${i + 1}:`, { byteOffset: byteOffset + offset, byteLength });
});
byteOffset += body.byteLength;
}

bufferRegions.forEach(({ offset, length }, i) => {
console.log(`\tbuffer ${i + 1}: { offset: ${offset}, length: ${length} }`);
});
}

await reader.return();

})().catch((e) => { console.error(e); process.exit(1); });

function loadRecordBatch(schema, header, body) {
return new RecordBatch(schema, header.length, new VectorLoader(body, header.nodes, header.buffers, new Map()).visitMany(schema.fields));
const children = new VectorLoader(body, header.nodes, header.buffers, new Map()).visitMany(schema.fields);
return new RecordBatch(
schema,
makeData({
type: new Struct(schema.fields),
length: header.length,
children: children
})
);
}

function loadDictionaryBatch(header, body, dictionaryType) {
return RecordBatch.new(new VectorLoader(body, header.nodes, header.buffers, new Map()).visitMany([dictionaryType]));
const schema = new Schema([new Field('', dictionaryType)]);
const children = new VectorLoader(body, header.nodes, header.buffers, new Map()).visitMany([dictionaryType]);
return new RecordBatch(
schema,
makeData({
type: new Struct(schema.fields),
length: header.length,
children: children
})
);
}
74 changes: 41 additions & 33 deletions js/jest.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,46 @@
// under the License.

export default {
verbose: false,
testEnvironment: "node",
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "test/tsconfig.json",
useESM: true,
verbose: false,
testEnvironment: "node",
rootDir: ".",
roots: [
"<rootDir>/test/",
],
extensionsToTreatAsEsm: [".ts"],
moduleFileExtensions: ["js", "mjs", "ts"],
coverageReporters: ["lcov", "json",],
coveragePathIgnorePatterns: [
"fb\\/.*\\.(js|ts)$",
"test\\/.*\\.(ts|js)$",
"/node_modules/",
],
moduleNameMapper: {
"^apache-arrow$": "<rootDir>/src/Arrow.node",
"^apache-arrow(.*)": "<rootDir>/src$1",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
},
rootDir: ".",
roots: ["<rootDir>/test/"],
preset: "ts-jest/presets/default-esm",
moduleFileExtensions: ["mjs", "js", "ts"],
coverageReporters: ["lcov", "json"],
coveragePathIgnorePatterns: [
"fb\\/.*\\.(js|ts)$",
"test\\/.*\\.(ts|js)$",
"/node_modules/",
],
transform: {
"^.+\\.js$": "ts-jest",
"^.+\\.ts$": "ts-jest",
},
transformIgnorePatterns: [
"/targets/(es5|es2015|esnext|apache-arrow)/",
"/node_modules/(?!@openpgp/web-stream-tools)/",
],
testRegex: "(.*(-|\\.)(test|spec)s?)\\.(ts|js)$",
testMatch: null,
moduleNameMapper: {
"^apache-arrow$": "<rootDir>/src/Arrow.node",
"^apache-arrow(.*)": "<rootDir>/src$1",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
testRegex: "(.*(-|\\.)(test|spec)s?)\\.(ts|js)$",
transform: {
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "test/tsconfig.json",
useESM: true,
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "test/tsconfig.json",
useESM: true,
},
],
},
transformIgnorePatterns: [
"/targets/(es5|es2015|esnext|apache-arrow)/",
"/node_modules/(?!@openpgp/web-stream-tools)/",
],
};
26 changes: 19 additions & 7 deletions js/jestconfigs/jest.apache-arrow.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,28 @@ import config from "../jest.config.js";
export default {
...config,
rootDir: "../",
preset: "ts-jest",
moduleFileExtensions: ["js", "ts"],
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.apache-arrow.json",
},
},
moduleNameMapper: {
"^apache-arrow(.*)": "<rootDir>/targets/apache-arrow$1",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
transform: {
...config.transform,
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.apache-arrow.json",
useESM: true,
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.apache-arrow.json",
useESM: true,
},
],
},
};
24 changes: 18 additions & 6 deletions js/jestconfigs/jest.coverage.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,23 @@ export default {
rootDir: "../",
collectCoverage: true,
reporters: undefined,
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.coverage.json",
useESM: true,
},
transform: {
...config.transform,
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.coverage.json",
useESM: true,
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.coverage.json",
useESM: true,
},
],
},
};
24 changes: 17 additions & 7 deletions js/jestconfigs/jest.es2015.cjs.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,26 @@ import config from "../jest.config.js";
export default {
...config,
rootDir: "../",
preset: "ts-jest",
moduleFileExtensions: ["js", "ts"],
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.cjs.json",
},
},
moduleNameMapper: {
"^apache-arrow(.*)": "<rootDir>/targets/es2015/cjs$1",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
transform: {
...config.transform,
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.cjs.json",
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.cjs.json",
},
],
},
};
26 changes: 19 additions & 7 deletions js/jestconfigs/jest.es2015.esm.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,28 @@ import config from "../jest.config.js";
export default {
...config,
rootDir: "../",
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.esm.json",
useESM: true,
},
},
moduleNameMapper: {
"^apache-arrow(.*)": "<rootDir>/targets/es2015/esm$1",
tslib: "tslib/tslib.es6.js",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
transform: {
...config.transform,
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.esm.json",
useESM: true,
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.esm.json",
useESM: true,
},
],
},
};
24 changes: 17 additions & 7 deletions js/jestconfigs/jest.es2015.umd.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,26 @@ import config from "../jest.config.js";
export default {
...config,
rootDir: "../",
preset: "ts-jest",
moduleFileExtensions: ["js", "ts"],
globals: {
"ts-jest": {
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.umd.json",
},
},
moduleNameMapper: {
"^apache-arrow": "<rootDir>/targets/es2015/umd/Arrow.js",
"^(\\.{1,2}/.*)\\.js$": "$1",
},
transform: {
...config.transform,
"^.+\\.js$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.umd.json",
},
],
"^.+\\.ts$": [
"ts-jest",
{
diagnostics: false,
tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.umd.json",
},
],
},
};
Loading

0 comments on commit 01a30a8

Please sign in to comment.