diff --git a/CHANGELOG.md b/CHANGELOG.md index 156f67f05..9ac85e0b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ This log is intended to keep track of backwards-incompatible changes, including but not limited to API changes and file location changes. Minor behavioral changes may not be included if they are not expected to break existing code. +## v0.18.0 + +* Browser scripts only expose `XLSX` variable + ## v0.17.4 * CLI script moved to `xlsx-cli` package diff --git a/Makefile b/Makefile index 639e3ad2f..4e4a38f5c 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,10 @@ bits/01_version.js: package.json bits/18_cfb.js: node_modules/cfb/xlscfb.flow.js cp $^ $@ +bits/83_numbers.js: modules/83_numbers.js + cp $^ $@ + + .PHONY: clean clean: ## Remove targets and build artifacts rm -f $(TARGET) $(FLOWTARGET) diff --git a/bits/21_ziputils.js b/bits/21_ziputils.js index 0419b615b..fab7c4e86 100644 --- a/bits/21_ziputils.js +++ b/bits/21_ziputils.js @@ -53,6 +53,12 @@ function getzipstr(zip, file/*:string*/, safe/*:?boolean*/)/*:?string*/ { try { return getzipstr(zip, file); } catch(e) { return null; } } +function getzipbin(zip, file/*:string*/, safe/*:?boolean*/)/*:any*/ { + if(!safe) return getdatabin(getzipfile(zip, file)); + if(!file) return null; + try { return getzipbin(zip, file); } catch(e) { return null; } +} + function zipentries(zip) { var k = zip.FullPaths || keys(zip.files), o = []; for(var i = 0; i < k.length; ++i) if(k[i].slice(-1) != '/') o.push(k[i]); diff --git a/bits/83_numbers.js b/bits/83_numbers.js new file mode 100755 index 000000000..792ca7719 --- /dev/null +++ b/bits/83_numbers.js @@ -0,0 +1,507 @@ +var NUMBERS = (function() { + var __defProp = Object.defineProperty; + var __getOwnPropDesc = Object.getOwnPropertyDescriptor; + var __getOwnPropNames = Object.getOwnPropertyNames; + var __hasOwnProp = Object.prototype.hasOwnProperty; + var __markAsModule = function(target) { + return __defProp(target, "__esModule", { value: true }); + }; + var __export = function(target, all) { + for (var name in all) + __defProp(target, name, { get: all[name], enumerable: true }); + }; + var __reExport = function(target, module, copyDefault, desc) { + if (module && typeof module === "object" || typeof module === "function") + for (var keys = __getOwnPropNames(module), i = 0, n = keys.length, key; i < n; i++) { + key = keys[i]; + if (!__hasOwnProp.call(target, key) && (copyDefault || key !== "default")) + __defProp(target, key, { get: function(k) { + return module[k]; + }.bind(null, key), enumerable: !(desc = __getOwnPropDesc(module, key)) || desc.enumerable }); + } + return target; + }; + var __toCommonJS = /* @__PURE__ */ function(cache) { + return function(module, temp) { + return cache && cache.get(module) || (temp = __reExport(__markAsModule({}), module, 1), cache && cache.set(module, temp), temp); + }; + }(typeof WeakMap !== "undefined" ? /* @__PURE__ */ new WeakMap() : 0); + + // 83_numbers.ts + var numbers_exports = {}; + __export(numbers_exports, { + parse_numbers: function() { + return numbers_default; + } + }); + + // src/util.ts + var u8_to_dataview = function(array) { + return new DataView(array.buffer, array.byteOffset, array.byteLength); + }; + var u8str = function(u8) { + return new TextDecoder().decode(u8); + }; + var u8concat = function(u8a) { + var len = u8a.reduce(function(acc, x) { + return acc + x.length; + }, 0); + var out = new Uint8Array(len); + var off = 0; + u8a.forEach(function(u8) { + out.set(u8, off); + off += u8.length; + }); + return out; + }; + + // src/proto.ts + function parse_varint49(buf, ptr) { + var l = ptr ? ptr[0] : 0; + var usz = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + usz |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 28); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 35); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 42); + ++l; + if (buf[l++] < 128) + break varint; + } + if (ptr) + ptr[0] = l; + return usz; + } + function varint_to_i32(buf) { + var l = 0, i32 = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + i32 |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 28; + } + return i32; + } + function parse_shallow(buf) { + var out = [], ptr = [0]; + while (ptr[0] < buf.length) { + var off = ptr[0]; + var num = parse_varint49(buf, ptr); + var type = num & 7; + num = Math.floor(num / 8); + var len = 0; + var res; + if (num == 0) + break; + switch (type) { + case 0: + { + var l = ptr[0]; + while (buf[ptr[0]++] >= 128) + ; + res = buf.slice(l, ptr[0]); + } + break; + case 5: + len = 4; + case 1: + if (!len) + len = 8; + case 2: + if (!len) + len = parse_varint49(buf, ptr); + res = buf.slice(ptr[0], ptr[0] + len); + ptr[0] += len; + break; + case 3: + case 4: + default: + throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off)); + } + var v = { offset: off, data: res }; + if (out[num] == null) + out[num] = [v]; + else + out[num].push(v); + } + return out; + } + function mappa(data, cb) { + if (!data) + return []; + return data.map(function(d) { + var _a; + try { + return cb(d.data); + } catch (e) { + var m = (_a = e.message) == null ? void 0 : _a.match(/at offset (\d+)/); + if (m) + e.message = e.message.replace(/at offset (\d+)/, "at offset " + (+m[1] + d.offset)); + throw e; + } + }); + } + + // src/frame.ts + function deframe(buf) { + var out = []; + var l = 0; + while (l < buf.length) { + var t = buf[l++]; + var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16; + l += 3; + out.push(process_chunk(t, buf.slice(l, l + len))); + l += len; + } + if (l !== buf.length) + throw new Error("data is not a valid framed stream!"); + return u8concat(out); + } + function process_chunk(type, buf) { + if (type != 0) + throw new Error("Unexpected Snappy chunk type ".concat(type)); + var ptr = [0]; + var usz = parse_varint49(buf, ptr); + var chunks = []; + while (ptr[0] < buf.length) { + var tag = buf[ptr[0]] & 3; + if (tag == 0) { + var len = buf[ptr[0]++] >> 2; + if (len < 60) + ++len; + else { + var c = len - 59; + len = buf[ptr[0]]; + if (c > 1) + len |= buf[ptr[0] + 1] << 8; + if (c > 2) + len |= buf[ptr[0] + 2] << 16; + if (c > 3) + len |= buf[ptr[0] + 3] << 24; + len >>>= 0; + len++; + ptr[0] += c; + } + chunks.push(buf.slice(ptr[0], ptr[0] + len)); + ptr[0] += len; + continue; + } else { + var offset = 0, length = 0; + if (tag == 1) { + length = (buf[ptr[0]] >> 2 & 7) + 4; + offset = (buf[ptr[0]++] & 224) << 3; + offset |= buf[ptr[0]++]; + } else { + length = (buf[ptr[0]++] >> 2) + 1; + if (tag == 2) { + offset = buf[ptr[0]] | buf[ptr[0] + 1] << 8; + ptr[0] += 2; + } else { + offset = (buf[ptr[0]] | buf[ptr[0] + 1] << 8 | buf[ptr[0] + 2] << 16 | buf[ptr[0] + 3] << 24) >>> 0; + ptr[0] += 4; + } + } + chunks = [u8concat(chunks)]; + if (offset == 0) + throw new Error("Invalid offset 0"); + if (offset > chunks[0].length) + throw new Error("Invalid offset beyond length"); + if (length >= offset) { + chunks.push(chunks[0].slice(-offset)); + length -= offset; + while (length >= chunks[chunks.length - 1].length) { + chunks.push(chunks[chunks.length - 1]); + length -= chunks[chunks.length - 1].length; + } + } + chunks.push(chunks[0].slice(-offset, -offset + length)); + } + } + var o = u8concat(chunks); + if (o.length != usz) + throw new Error("Unexpected length: ".concat(o.length, " != ").concat(usz)); + return o; + } + + // src/iwa.ts + function parse_iwa(buf) { + var out = [], ptr = [0]; + while (ptr[0] < buf.length) { + var len = parse_varint49(buf, ptr); + var ai = parse_shallow(buf.slice(ptr[0], ptr[0] + len)); + ptr[0] += len; + var res = { + id: varint_to_i32(ai[1][0].data), + messages: [] + }; + ai[2].forEach(function(b) { + var mi = parse_shallow(b.data); + var fl = varint_to_i32(mi[3][0].data); + res.messages.push({ + meta: mi, + data: buf.slice(ptr[0], ptr[0] + fl) + }); + ptr[0] += fl; + }); + out.push(res); + } + return out; + } + + // src/numbers.ts + var encode_col = function(C) { + var s = ""; + for (++C; C; C = Math.floor((C - 1) / 26)) + s = String.fromCharCode((C - 1) % 26 + 65) + s; + return s; + }; + var encode_cell = function(c) { + return "".concat(encode_col(c.c)).concat(c.r + 1); + }; + var encode_range = function(r) { + return encode_cell(r.s) + ":" + encode_cell(r.e); + }; + var book_new = function() { + return { Sheets: {}, SheetNames: [] }; + }; + var book_append_sheet = function(wb, ws, name) { + if (!name) { + for (var i = 1; i < 9999; ++i) + if (wb.SheetNames.indexOf(name = "Sheet ".concat(i)) == -1) + break; + } + wb.SheetNames.push(name); + wb.Sheets[name] = ws; + }; + function parse_numbers(cfb) { + var out = []; + cfb.FileIndex.forEach(function(s) { + if (!s.name.match(/\.iwa$/)) + return; + var o; + try { + o = deframe(s.content); + } catch (e) { + return console.log("?? " + s.content.length + " " + (e.message || e)); + } + var packets; + try { + packets = parse_iwa(o); + } catch (e) { + return console.log("## " + (e.message || e)); + } + packets.forEach(function(packet) { + out[+packet.id] = packet.messages; + }); + }); + if (!out.length) + throw new Error("File has no messages"); + var docroot; + out.forEach(function(iwams) { + iwams.forEach(function(iwam) { + var mtype = varint_to_i32(iwam.meta[1][0].data) >>> 0; + if (mtype == 1) { + if (!docroot) + docroot = iwam; + else + throw new Error("Document has multiple roots"); + } + }); + }); + if (!docroot) + throw new Error("Cannot find Document root"); + return parse_docroot(out, docroot); + } + var numbers_default = parse_numbers; + function parse_Reference(buf) { + var pb = parse_shallow(buf); + return parse_varint49(pb[1][0].data); + } + function parse_TST_TableDataList(M, root) { + var pb = parse_shallow(root.data); + var entries = pb[3]; + var data = []; + entries == null ? void 0 : entries.forEach(function(entry) { + var le = parse_shallow(entry.data); + var key = varint_to_i32(le[1][0].data) >>> 0; + data[key] = u8str(le[3][0].data); + }); + return data; + } + function parse_TST_TileRowInfo(u8) { + var pb = parse_shallow(u8); + var R = varint_to_i32(pb[1][0].data) >>> 0; + var storage = pb[3][0].data; + var offsets = u8_to_dataview(pb[4][0].data); + var cells = []; + for (var C = 0; C < offsets.byteLength / 2; ++C) { + var off = offsets.getUint16(C * 2, true); + if (off > storage.length) + continue; + cells[C] = storage.subarray(off, offsets.getUint16(C * 2 + 2, true)); + } + return { R: R, cells: cells }; + } + function parse_TST_Tile(M, root) { + var pb = parse_shallow(root.data); + var ri = mappa(pb[5], parse_TST_TileRowInfo); + return ri.reduce(function(acc, x) { + if (!acc[x.R]) + acc[x.R] = []; + x.cells.forEach(function(cell, C) { + if (acc[x.R][C]) + throw new Error("Duplicate cell r=".concat(x.R, " c=").concat(C)); + acc[x.R][C] = cell; + }); + return acc; + }, []); + } + function parse_TST_TableModelArchive(M, root, ws) { + var pb = parse_shallow(root.data); + var range = { s: { r: 0, c: 0 }, e: { r: 0, c: 0 } }; + range.e.r = (varint_to_i32(pb[6][0].data) >>> 0) - 1; + if (range.e.r < 0) + throw new Error("Invalid row varint ".concat(pb[6][0].data)); + range.e.c = (varint_to_i32(pb[7][0].data) >>> 0) - 1; + if (range.e.c < 0) + throw new Error("Invalid col varint ".concat(pb[7][0].data)); + ws["!ref"] = encode_range(range); + { + var store = parse_shallow(pb[4][0].data); + var sst = parse_TST_TableDataList(M, M[parse_Reference(store[4][0].data)][0]); + { + var tile = parse_shallow(store[3][0].data); + var tiles = []; + tile[1].forEach(function(t) { + var tl = parse_shallow(t.data); + var ref = M[parse_Reference(tl[2][0].data)][0]; + var mtype = varint_to_i32(ref.meta[1][0].data); + if (mtype != 6002) + throw new Error("6001 unexpected reference to ".concat(mtype)); + tiles.push({ id: varint_to_i32(tl[1][0].data), ref: parse_TST_Tile(M, ref) }); + }); + tiles.forEach(function(tile2) { + tile2.ref.forEach(function(row, R) { + row.forEach(function(buf, C) { + var dv = u8_to_dataview(buf); + var ctype = buf[2]; + var addr = encode_cell({ r: R, c: C }); + switch (ctype) { + case 0: + { + switch (buf[1]) { + case 3: + ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] }; + break; + case 2: + ws[addr] = { t: "n", v: dv.getFloat64(16, true) }; + break; + case 0: + break; + case 5: + break; + case 7: + break; + case 6: + ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 }; + break; + default: + throw new Error("Unsupported cell type ".concat(buf.slice(0, 4))); + } + } + break; + case 3: + { + ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] }; + } + break; + case 2: + { + ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) }; + } + break; + case 6: + { + ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 }; + } + break; + default: + throw new Error("Unsupported cell type ".concat(ctype)); + } + }); + }); + }); + } + } + } + function parse_TST_TableInfoArchive(M, root) { + var pb = parse_shallow(root.data); + var out = { "!ref": "A1" }; + var tableref = M[parse_Reference(pb[2][0].data)]; + var mtype = varint_to_i32(tableref[0].meta[1][0].data); + if (mtype != 6001) + throw new Error("6000 unexpected reference to ".concat(mtype)); + parse_TST_TableModelArchive(M, tableref[0], out); + return out; + } + function parse_sheetroot(M, root) { + var _a; + var pb = parse_shallow(root.data); + var out = [{ "!ref": "A1" }, ((_a = pb[1]) == null ? void 0 : _a[0]) ? u8str(pb[1][0].data) : ""]; + var shapeoffs = mappa(pb[2], parse_Reference); + var seen = false; + shapeoffs.forEach(function(off) { + M[off].forEach(function(m) { + var mtype = varint_to_i32(m.meta[1][0].data); + if (mtype == 6e3) { + if (seen) + return; + out[0] = parse_TST_TableInfoArchive(M, m); + seen = true; + } + }); + }); + return out; + } + function parse_docroot(M, root) { + var out = book_new(); + var pb = parse_shallow(root.data); + var sheetoffs = mappa(pb[1], parse_Reference); + sheetoffs.forEach(function(off) { + M[off].forEach(function(m) { + var mtype = varint_to_i32(m.meta[1][0].data); + if (mtype == 2) { + var root2 = parse_sheetroot(M, m); + book_append_sheet(out, root2[0], root2[1]); + } + }); + }); + if (out.SheetNames.length == 0) + throw new Error("Empty NUMBERS file"); + return out; + } + return __toCommonJS(numbers_exports); +})(); +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 2b7807255..55c848d97 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -60,7 +60,16 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { /* UOC */ if(safegetzipfile(zip, 'objectdata.xml')) return parse_ods(zip, opts); /* Numbers */ - if(safegetzipfile(zip, 'Index/Document.iwa')) throw new Error('Unsupported NUMBERS file'); + if(safegetzipfile(zip, 'Index/Document.iwa')) { + if(typeof Uint8Array == "undefined") throw new Error('NUMBERS file parsing requires Uint8Array support') + if(typeof NUMBERS != "undefined") { + if(zip.FileIndex) return NUMBERS.parse_numbers(zip); + var _zip = CFB.utils.cfb_new(); + zipentries(zip).forEach(function(e) { zip_add_file(_zip, e, getzipbin(zip, e)); }); + return NUMBERS.parse_numbers(_zip); + } + throw new Error('Unsupported NUMBERS file'); + } if(!safegetzipfile(zip, '[Content_Types].xml')) { if(safegetzipfile(zip, 'index.xml.gz')) throw new Error('Unsupported NUMBERS 08 file'); if(safegetzipfile(zip, 'index.xml')) throw new Error('Unsupported NUMBERS 09 file'); diff --git a/bits/99_footer.js b/bits/99_footer.js index dc20acb70..06c6eb8a4 100644 --- a/bits/99_footer.js +++ b/bits/99_footer.js @@ -7,5 +7,3 @@ else if(typeof define === 'function' && define.amd) define('xlsx', function() { else make_xlsx_lib(XLSX); /* NOTE: the following extra line is needed for "Lightning Locker Service" */ if(typeof window !== 'undefined' && !window.XLSX) try { window.XLSX = XLSX; } catch(e) {} -/*exported XLS, ODS */ -var XLS = XLSX, ODS = XLSX; diff --git a/modules/.eslintrc b/modules/.eslintrc new file mode 100644 index 000000000..08095a4fb --- /dev/null +++ b/modules/.eslintrc @@ -0,0 +1,10 @@ +{ + "parser": "@typescript-eslint/parser", + "env": { "shared-node-browser":true }, + "plugins": ["@typescript-eslint"], + "extends": ["plugin:@typescript-eslint/recommended"], + "rules": { + "no-var": 0, + "semi": [ 2, "always" ] + } +} diff --git a/modules/.gitignore b/modules/.gitignore new file mode 100644 index 000000000..8d31954f5 --- /dev/null +++ b/modules/.gitignore @@ -0,0 +1,2 @@ +test_files +numbers_to_csv.node.js diff --git a/modules/83_numbers.js b/modules/83_numbers.js new file mode 100644 index 000000000..792ca7719 --- /dev/null +++ b/modules/83_numbers.js @@ -0,0 +1,507 @@ +var NUMBERS = (function() { + var __defProp = Object.defineProperty; + var __getOwnPropDesc = Object.getOwnPropertyDescriptor; + var __getOwnPropNames = Object.getOwnPropertyNames; + var __hasOwnProp = Object.prototype.hasOwnProperty; + var __markAsModule = function(target) { + return __defProp(target, "__esModule", { value: true }); + }; + var __export = function(target, all) { + for (var name in all) + __defProp(target, name, { get: all[name], enumerable: true }); + }; + var __reExport = function(target, module, copyDefault, desc) { + if (module && typeof module === "object" || typeof module === "function") + for (var keys = __getOwnPropNames(module), i = 0, n = keys.length, key; i < n; i++) { + key = keys[i]; + if (!__hasOwnProp.call(target, key) && (copyDefault || key !== "default")) + __defProp(target, key, { get: function(k) { + return module[k]; + }.bind(null, key), enumerable: !(desc = __getOwnPropDesc(module, key)) || desc.enumerable }); + } + return target; + }; + var __toCommonJS = /* @__PURE__ */ function(cache) { + return function(module, temp) { + return cache && cache.get(module) || (temp = __reExport(__markAsModule({}), module, 1), cache && cache.set(module, temp), temp); + }; + }(typeof WeakMap !== "undefined" ? /* @__PURE__ */ new WeakMap() : 0); + + // 83_numbers.ts + var numbers_exports = {}; + __export(numbers_exports, { + parse_numbers: function() { + return numbers_default; + } + }); + + // src/util.ts + var u8_to_dataview = function(array) { + return new DataView(array.buffer, array.byteOffset, array.byteLength); + }; + var u8str = function(u8) { + return new TextDecoder().decode(u8); + }; + var u8concat = function(u8a) { + var len = u8a.reduce(function(acc, x) { + return acc + x.length; + }, 0); + var out = new Uint8Array(len); + var off = 0; + u8a.forEach(function(u8) { + out.set(u8, off); + off += u8.length; + }); + return out; + }; + + // src/proto.ts + function parse_varint49(buf, ptr) { + var l = ptr ? ptr[0] : 0; + var usz = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + usz |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 28); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 35); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 42); + ++l; + if (buf[l++] < 128) + break varint; + } + if (ptr) + ptr[0] = l; + return usz; + } + function varint_to_i32(buf) { + var l = 0, i32 = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + i32 |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 28; + } + return i32; + } + function parse_shallow(buf) { + var out = [], ptr = [0]; + while (ptr[0] < buf.length) { + var off = ptr[0]; + var num = parse_varint49(buf, ptr); + var type = num & 7; + num = Math.floor(num / 8); + var len = 0; + var res; + if (num == 0) + break; + switch (type) { + case 0: + { + var l = ptr[0]; + while (buf[ptr[0]++] >= 128) + ; + res = buf.slice(l, ptr[0]); + } + break; + case 5: + len = 4; + case 1: + if (!len) + len = 8; + case 2: + if (!len) + len = parse_varint49(buf, ptr); + res = buf.slice(ptr[0], ptr[0] + len); + ptr[0] += len; + break; + case 3: + case 4: + default: + throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off)); + } + var v = { offset: off, data: res }; + if (out[num] == null) + out[num] = [v]; + else + out[num].push(v); + } + return out; + } + function mappa(data, cb) { + if (!data) + return []; + return data.map(function(d) { + var _a; + try { + return cb(d.data); + } catch (e) { + var m = (_a = e.message) == null ? void 0 : _a.match(/at offset (\d+)/); + if (m) + e.message = e.message.replace(/at offset (\d+)/, "at offset " + (+m[1] + d.offset)); + throw e; + } + }); + } + + // src/frame.ts + function deframe(buf) { + var out = []; + var l = 0; + while (l < buf.length) { + var t = buf[l++]; + var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16; + l += 3; + out.push(process_chunk(t, buf.slice(l, l + len))); + l += len; + } + if (l !== buf.length) + throw new Error("data is not a valid framed stream!"); + return u8concat(out); + } + function process_chunk(type, buf) { + if (type != 0) + throw new Error("Unexpected Snappy chunk type ".concat(type)); + var ptr = [0]; + var usz = parse_varint49(buf, ptr); + var chunks = []; + while (ptr[0] < buf.length) { + var tag = buf[ptr[0]] & 3; + if (tag == 0) { + var len = buf[ptr[0]++] >> 2; + if (len < 60) + ++len; + else { + var c = len - 59; + len = buf[ptr[0]]; + if (c > 1) + len |= buf[ptr[0] + 1] << 8; + if (c > 2) + len |= buf[ptr[0] + 2] << 16; + if (c > 3) + len |= buf[ptr[0] + 3] << 24; + len >>>= 0; + len++; + ptr[0] += c; + } + chunks.push(buf.slice(ptr[0], ptr[0] + len)); + ptr[0] += len; + continue; + } else { + var offset = 0, length = 0; + if (tag == 1) { + length = (buf[ptr[0]] >> 2 & 7) + 4; + offset = (buf[ptr[0]++] & 224) << 3; + offset |= buf[ptr[0]++]; + } else { + length = (buf[ptr[0]++] >> 2) + 1; + if (tag == 2) { + offset = buf[ptr[0]] | buf[ptr[0] + 1] << 8; + ptr[0] += 2; + } else { + offset = (buf[ptr[0]] | buf[ptr[0] + 1] << 8 | buf[ptr[0] + 2] << 16 | buf[ptr[0] + 3] << 24) >>> 0; + ptr[0] += 4; + } + } + chunks = [u8concat(chunks)]; + if (offset == 0) + throw new Error("Invalid offset 0"); + if (offset > chunks[0].length) + throw new Error("Invalid offset beyond length"); + if (length >= offset) { + chunks.push(chunks[0].slice(-offset)); + length -= offset; + while (length >= chunks[chunks.length - 1].length) { + chunks.push(chunks[chunks.length - 1]); + length -= chunks[chunks.length - 1].length; + } + } + chunks.push(chunks[0].slice(-offset, -offset + length)); + } + } + var o = u8concat(chunks); + if (o.length != usz) + throw new Error("Unexpected length: ".concat(o.length, " != ").concat(usz)); + return o; + } + + // src/iwa.ts + function parse_iwa(buf) { + var out = [], ptr = [0]; + while (ptr[0] < buf.length) { + var len = parse_varint49(buf, ptr); + var ai = parse_shallow(buf.slice(ptr[0], ptr[0] + len)); + ptr[0] += len; + var res = { + id: varint_to_i32(ai[1][0].data), + messages: [] + }; + ai[2].forEach(function(b) { + var mi = parse_shallow(b.data); + var fl = varint_to_i32(mi[3][0].data); + res.messages.push({ + meta: mi, + data: buf.slice(ptr[0], ptr[0] + fl) + }); + ptr[0] += fl; + }); + out.push(res); + } + return out; + } + + // src/numbers.ts + var encode_col = function(C) { + var s = ""; + for (++C; C; C = Math.floor((C - 1) / 26)) + s = String.fromCharCode((C - 1) % 26 + 65) + s; + return s; + }; + var encode_cell = function(c) { + return "".concat(encode_col(c.c)).concat(c.r + 1); + }; + var encode_range = function(r) { + return encode_cell(r.s) + ":" + encode_cell(r.e); + }; + var book_new = function() { + return { Sheets: {}, SheetNames: [] }; + }; + var book_append_sheet = function(wb, ws, name) { + if (!name) { + for (var i = 1; i < 9999; ++i) + if (wb.SheetNames.indexOf(name = "Sheet ".concat(i)) == -1) + break; + } + wb.SheetNames.push(name); + wb.Sheets[name] = ws; + }; + function parse_numbers(cfb) { + var out = []; + cfb.FileIndex.forEach(function(s) { + if (!s.name.match(/\.iwa$/)) + return; + var o; + try { + o = deframe(s.content); + } catch (e) { + return console.log("?? " + s.content.length + " " + (e.message || e)); + } + var packets; + try { + packets = parse_iwa(o); + } catch (e) { + return console.log("## " + (e.message || e)); + } + packets.forEach(function(packet) { + out[+packet.id] = packet.messages; + }); + }); + if (!out.length) + throw new Error("File has no messages"); + var docroot; + out.forEach(function(iwams) { + iwams.forEach(function(iwam) { + var mtype = varint_to_i32(iwam.meta[1][0].data) >>> 0; + if (mtype == 1) { + if (!docroot) + docroot = iwam; + else + throw new Error("Document has multiple roots"); + } + }); + }); + if (!docroot) + throw new Error("Cannot find Document root"); + return parse_docroot(out, docroot); + } + var numbers_default = parse_numbers; + function parse_Reference(buf) { + var pb = parse_shallow(buf); + return parse_varint49(pb[1][0].data); + } + function parse_TST_TableDataList(M, root) { + var pb = parse_shallow(root.data); + var entries = pb[3]; + var data = []; + entries == null ? void 0 : entries.forEach(function(entry) { + var le = parse_shallow(entry.data); + var key = varint_to_i32(le[1][0].data) >>> 0; + data[key] = u8str(le[3][0].data); + }); + return data; + } + function parse_TST_TileRowInfo(u8) { + var pb = parse_shallow(u8); + var R = varint_to_i32(pb[1][0].data) >>> 0; + var storage = pb[3][0].data; + var offsets = u8_to_dataview(pb[4][0].data); + var cells = []; + for (var C = 0; C < offsets.byteLength / 2; ++C) { + var off = offsets.getUint16(C * 2, true); + if (off > storage.length) + continue; + cells[C] = storage.subarray(off, offsets.getUint16(C * 2 + 2, true)); + } + return { R: R, cells: cells }; + } + function parse_TST_Tile(M, root) { + var pb = parse_shallow(root.data); + var ri = mappa(pb[5], parse_TST_TileRowInfo); + return ri.reduce(function(acc, x) { + if (!acc[x.R]) + acc[x.R] = []; + x.cells.forEach(function(cell, C) { + if (acc[x.R][C]) + throw new Error("Duplicate cell r=".concat(x.R, " c=").concat(C)); + acc[x.R][C] = cell; + }); + return acc; + }, []); + } + function parse_TST_TableModelArchive(M, root, ws) { + var pb = parse_shallow(root.data); + var range = { s: { r: 0, c: 0 }, e: { r: 0, c: 0 } }; + range.e.r = (varint_to_i32(pb[6][0].data) >>> 0) - 1; + if (range.e.r < 0) + throw new Error("Invalid row varint ".concat(pb[6][0].data)); + range.e.c = (varint_to_i32(pb[7][0].data) >>> 0) - 1; + if (range.e.c < 0) + throw new Error("Invalid col varint ".concat(pb[7][0].data)); + ws["!ref"] = encode_range(range); + { + var store = parse_shallow(pb[4][0].data); + var sst = parse_TST_TableDataList(M, M[parse_Reference(store[4][0].data)][0]); + { + var tile = parse_shallow(store[3][0].data); + var tiles = []; + tile[1].forEach(function(t) { + var tl = parse_shallow(t.data); + var ref = M[parse_Reference(tl[2][0].data)][0]; + var mtype = varint_to_i32(ref.meta[1][0].data); + if (mtype != 6002) + throw new Error("6001 unexpected reference to ".concat(mtype)); + tiles.push({ id: varint_to_i32(tl[1][0].data), ref: parse_TST_Tile(M, ref) }); + }); + tiles.forEach(function(tile2) { + tile2.ref.forEach(function(row, R) { + row.forEach(function(buf, C) { + var dv = u8_to_dataview(buf); + var ctype = buf[2]; + var addr = encode_cell({ r: R, c: C }); + switch (ctype) { + case 0: + { + switch (buf[1]) { + case 3: + ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] }; + break; + case 2: + ws[addr] = { t: "n", v: dv.getFloat64(16, true) }; + break; + case 0: + break; + case 5: + break; + case 7: + break; + case 6: + ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 }; + break; + default: + throw new Error("Unsupported cell type ".concat(buf.slice(0, 4))); + } + } + break; + case 3: + { + ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] }; + } + break; + case 2: + { + ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) }; + } + break; + case 6: + { + ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 }; + } + break; + default: + throw new Error("Unsupported cell type ".concat(ctype)); + } + }); + }); + }); + } + } + } + function parse_TST_TableInfoArchive(M, root) { + var pb = parse_shallow(root.data); + var out = { "!ref": "A1" }; + var tableref = M[parse_Reference(pb[2][0].data)]; + var mtype = varint_to_i32(tableref[0].meta[1][0].data); + if (mtype != 6001) + throw new Error("6000 unexpected reference to ".concat(mtype)); + parse_TST_TableModelArchive(M, tableref[0], out); + return out; + } + function parse_sheetroot(M, root) { + var _a; + var pb = parse_shallow(root.data); + var out = [{ "!ref": "A1" }, ((_a = pb[1]) == null ? void 0 : _a[0]) ? u8str(pb[1][0].data) : ""]; + var shapeoffs = mappa(pb[2], parse_Reference); + var seen = false; + shapeoffs.forEach(function(off) { + M[off].forEach(function(m) { + var mtype = varint_to_i32(m.meta[1][0].data); + if (mtype == 6e3) { + if (seen) + return; + out[0] = parse_TST_TableInfoArchive(M, m); + seen = true; + } + }); + }); + return out; + } + function parse_docroot(M, root) { + var out = book_new(); + var pb = parse_shallow(root.data); + var sheetoffs = mappa(pb[1], parse_Reference); + sheetoffs.forEach(function(off) { + M[off].forEach(function(m) { + var mtype = varint_to_i32(m.meta[1][0].data); + if (mtype == 2) { + var root2 = parse_sheetroot(M, m); + book_append_sheet(out, root2[0], root2[1]); + } + }); + }); + if (out.SheetNames.length == 0) + throw new Error("Empty NUMBERS file"); + return out; + } + return __toCommonJS(numbers_exports); +})(); +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ diff --git a/modules/83_numbers.ts b/modules/83_numbers.ts new file mode 100644 index 000000000..27dcbd3cb --- /dev/null +++ b/modules/83_numbers.ts @@ -0,0 +1,4 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +import parse_numbers from './src/numbers'; + +export { parse_numbers }; \ No newline at end of file diff --git a/modules/Makefile b/modules/Makefile new file mode 100644 index 000000000..eb9cba203 --- /dev/null +++ b/modules/Makefile @@ -0,0 +1,20 @@ +LIBFILES=$(wildcard src/*.ts) +TSFILES=$(wildcard *.ts) +ENTRIES=$(subst .ts,.js,$(TSFILES)) + +.PHONY: all +all: $(ENTRIES) + +83_numbers.js: 83_numbers.ts $(LIBFILES) + npx esbuild $< --bundle --outfile=$@ --platform=browser --format=iife --global-name=NUMBERS --target=es5 + +%.node.js: %.node.ts $(LIBFILES) + npx esbuild $< --bundle --external:xlsx --outfile=$@ --minify --platform=node + sed -i '' 's/ts-node/node/g' $@ + +%.js: %.ts $(LIBFILES) + npx esbuild $< --bundle --outfile=$@ --platform=browser --format=iife --global-name=$* --target=es5 + +.PHONY: clean +clean: + rm $(ENTRIES) diff --git a/modules/numbers_to_csv.node.ts b/modules/numbers_to_csv.node.ts new file mode 100644 index 000000000..69d2e2d32 --- /dev/null +++ b/modules/numbers_to_csv.node.ts @@ -0,0 +1,13 @@ +#!/usr/bin/env ts-node +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +import { read } from 'cfb'; +import { utils } from 'xlsx'; +import parse_numbers from './src/numbers'; + +var f = process.argv[2]; +var cfb = read(f, {type: "file"}); +var wb = parse_numbers(cfb); +var sn = process.argv[3]; +if(sn && !isNaN(+sn)) sn = wb.SheetNames[+sn]; +if(wb.SheetNames.indexOf(sn) == -1) sn = wb.SheetNames[0]; +console.log(utils.sheet_to_csv(wb.Sheets[sn])); \ No newline at end of file diff --git a/modules/package.json b/modules/package.json new file mode 100644 index 000000000..ea02a338f --- /dev/null +++ b/modules/package.json @@ -0,0 +1,5 @@ +{ + "devDependencies": { + "esbuild": "0.14.14" + } +} diff --git a/modules/src/frame.ts b/modules/src/frame.ts new file mode 100644 index 000000000..dba678957 --- /dev/null +++ b/modules/src/frame.ts @@ -0,0 +1,78 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +import { Ptr, parse_varint49 } from './proto'; +import { u8concat } from './util'; + +function is_framed(buf: Uint8Array): boolean { + var l = 0; + while(l < buf.length) { + l++; + var len = buf[l] | (buf[l+1]<<8) | (buf[l+2] << 16); l += 3; + l += len; + } + return l == buf.length; +} +export { is_framed }; + +function deframe(buf: Uint8Array): Uint8Array { + var out = []; + var l = 0; + while(l < buf.length) { + var t = buf[l++]; + var len = buf[l] | (buf[l+1]<<8) | (buf[l+2] << 16); l += 3; + out.push(process_chunk(t, buf.slice(l, l + len))); + l += len; + } + if(l !== buf.length) throw new Error("data is not a valid framed stream!"); + return u8concat(out); +} +export { deframe }; + +function process_chunk(type: number, buf: Uint8Array): Uint8Array { + if(type != 0) throw new Error(`Unexpected Snappy chunk type ${type}`); + var ptr: Ptr = [0]; + + var usz = parse_varint49(buf, ptr); + var chunks = []; + while(ptr[0] < buf.length) { + var tag = buf[ptr[0]] & 0x3; + if(tag == 0) { + var len = buf[ptr[0]++] >> 2; + if(len < 60) ++len; + else { + var c = len - 59; + len = buf[ptr[0]]; + if(c > 1) len |= (buf[ptr[0]+1]<<8); + if(c > 2) len |= (buf[ptr[0]+2]<<16); + if(c > 3) len |= (buf[ptr[0]+3]<<24); + len >>>=0; len++; + ptr[0] += c; + } + chunks.push(buf.slice(ptr[0], ptr[0] + len)); ptr[0] += len; continue; + } else { + var offset = 0, length = 0; + if(tag == 1) { + length = ((buf[ptr[0]] >> 2) & 0x7) + 4; + offset = (buf[ptr[0]++] & 0xE0) << 3; + offset |= buf[ptr[0]++]; + } else { + length = (buf[ptr[0]++] >> 2) + 1; + if(tag == 2) { offset = buf[ptr[0]] | (buf[ptr[0]+1]<<8); ptr[0] += 2; } + else { offset = (buf[ptr[0]] | (buf[ptr[0]+1]<<8) | (buf[ptr[0]+2]<<16) | (buf[ptr[0]+3]<<24))>>>0; ptr[0] += 4; } + } + chunks = [u8concat(chunks)]; + if(offset == 0) throw new Error("Invalid offset 0"); + if(offset > chunks[0].length) throw new Error("Invalid offset beyond length"); + if(length >= offset) { + chunks.push(chunks[0].slice(-offset)); length -= offset; + while(length >= chunks[chunks.length-1].length) { + chunks.push(chunks[chunks.length - 1]); + length -= chunks[chunks.length - 1].length; + } + } + chunks.push(chunks[0].slice(-offset, -offset + length)); + } + } + var o = u8concat(chunks); + if(o.length != usz) throw new Error(`Unexpected length: ${o.length} != ${usz}`); + return o; +} \ No newline at end of file diff --git a/modules/src/iwa.ts b/modules/src/iwa.ts new file mode 100644 index 000000000..bb08c4757 --- /dev/null +++ b/modules/src/iwa.ts @@ -0,0 +1,40 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +import { Ptr, ProtoMessage, parse_shallow, parse_varint49, varint_to_i32 } from './proto'; + +interface IWAMessage { + /** Metadata in .TSP.MessageInfo */ + meta: ProtoMessage; + data: Uint8Array; +} +interface IWAArchiveInfo { + id?: number; + messages?: IWAMessage[]; +} +export { IWAMessage, IWAArchiveInfo }; + +function parse_iwa(buf: Uint8Array): IWAArchiveInfo[] { + var out: IWAArchiveInfo[] = [], ptr: Ptr = [0]; + while(ptr[0] < buf.length) { + /* .TSP.ArchiveInfo */ + var len = parse_varint49(buf, ptr); + var ai = parse_shallow(buf.slice(ptr[0], ptr[0] + len)); + ptr[0] += len; + + var res: IWAArchiveInfo = { + id: varint_to_i32(ai[1][0].data), + messages: [] + }; + ai[2].forEach(b => { + var mi = parse_shallow(b.data); + var fl = varint_to_i32(mi[3][0].data); + res.messages.push({ + meta: mi, + data: buf.slice(ptr[0], ptr[0] + fl) + }); + ptr[0] += fl; + }); + out.push(res); + } + return out; +} +export { parse_iwa }; \ No newline at end of file diff --git a/modules/src/numbers.ts b/modules/src/numbers.ts new file mode 100644 index 000000000..5f10deafa --- /dev/null +++ b/modules/src/numbers.ts @@ -0,0 +1,213 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +import { CFB$Container } from 'cfb'; +import { WorkBook, WorkSheet, CellAddress, Range, CellObject } from '../../'; +import { u8str, u8_to_dataview } from './util'; +import { parse_shallow, varint_to_i32, parse_varint49, mappa } from './proto'; +import { deframe } from './frame'; +import { IWAArchiveInfo, IWAMessage, parse_iwa } from './iwa'; + +/* written here to avoid a full import of the 'xlsx' library */ +var encode_col = (C: number): string => { + var s=""; + for(++C; C; C=Math.floor((C-1)/26)) s = String.fromCharCode(((C-1)%26) + 65) + s; + return s; +}; +var encode_cell = (c: CellAddress): string => `${encode_col(c.c)}${c.r+1}`; +var encode_range = (r: Range): string => encode_cell(r.s) + ":" + encode_cell(r.e); +var book_new = (): WorkBook => ({Sheets:{}, SheetNames:[]}); +var book_append_sheet = (wb: WorkBook, ws: WorkSheet, name?: string): void => { + if(!name) for(var i = 1; i < 9999; ++i) if(wb.SheetNames.indexOf(name = `Sheet ${i}`) == -1) break; + wb.SheetNames.push(name); wb.Sheets[name] = ws; +}; + +function parse_numbers(cfb: CFB$Container): WorkBook { + var out: IWAMessage[][] = []; + /* collect entire message space */ + cfb.FileIndex.forEach(s => { + if(!s.name.match(/\.iwa$/)) return; + var o: Uint8Array; + try { o = deframe(s.content as Uint8Array); } catch(e) { return console.log("?? " + s.content.length + " " + (e.message || e)); } + var packets: IWAArchiveInfo[]; + try { packets = parse_iwa(o); } catch(e) { return console.log("## " + (e.message || e)); } + packets.forEach(packet => {out[+packet.id] = packet.messages;}); + }); + if(!out.length) throw new Error("File has no messages"); + /* find document root */ + var docroot: IWAMessage; + out.forEach((iwams) => { + iwams.forEach((iwam) => { + var mtype = varint_to_i32(iwam.meta[1][0].data) >>> 0; + if(mtype == 1) { + if(!docroot) docroot = iwam; + else throw new Error("Document has multiple roots"); + } + }); + }); + if(!docroot) throw new Error("Cannot find Document root"); + return parse_docroot(out, docroot); +} +export default parse_numbers; + +// .TSP.Reference +function parse_Reference(buf: Uint8Array): number { + var pb = parse_shallow(buf); + return parse_varint49(pb[1][0].data); +} + +// .TST.TableDataList +function parse_TST_TableDataList(M: IWAMessage[][], root: IWAMessage): string[] { + var pb = parse_shallow(root.data); + var entries = pb[3]; + var data = []; + entries?.forEach(entry => { + var le = parse_shallow(entry.data); + var key = varint_to_i32(le[1][0].data)>>>0; + data[key] = u8str(le[3][0].data); + }); + return data; +} + +interface TileRowInfo { + R: number; + cells?: Uint8Array[]; +} +// .TSP.TileRowInfo +function parse_TST_TileRowInfo(u8: Uint8Array): TileRowInfo { + var pb = parse_shallow(u8); + var R = varint_to_i32(pb[1][0].data) >>> 0; + var storage = pb[3][0].data; + var offsets = u8_to_dataview(pb[4][0].data); + var cells = []; + for(var C = 0; C < offsets.byteLength/2; ++C) { + var off = offsets.getUint16(C*2, true); + if(off > storage.length) continue; + cells[C] = storage.subarray(off, offsets.getUint16(C*2+2, true)); + } + return { R, cells }; +} + +// .TST.Tile +function parse_TST_Tile(M: IWAMessage[][], root: IWAMessage): Uint8Array[][] { + var pb = parse_shallow(root.data); + var ri = mappa(pb[5], parse_TST_TileRowInfo); + return ri.reduce((acc, x) => { + if(!acc[x.R]) acc[x.R] = []; + x.cells.forEach((cell, C) => { + if(acc[x.R][C]) throw new Error(`Duplicate cell r=${x.R} c=${C}`); + acc[x.R][C] = cell; + }); + return acc; + }, [] as Uint8Array[][]); +} + +// .TST.TableModelArchive +function parse_TST_TableModelArchive(M: IWAMessage[][], root: IWAMessage, ws: WorkSheet) { + var pb = parse_shallow(root.data); + var range: Range = { s: {r:0, c:0}, e: {r:0, c:0} }; + range.e.r = (varint_to_i32(pb[6][0].data) >>> 0) - 1; + if(range.e.r < 0) throw new Error(`Invalid row varint ${pb[6][0].data}`); + range.e.c = (varint_to_i32(pb[7][0].data) >>> 0) - 1; + if(range.e.c < 0) throw new Error(`Invalid col varint ${pb[7][0].data}`); + ws["!ref"] = encode_range(range); + + { + // .TST.DataStore + var store = parse_shallow(pb[4][0].data); + + var sst = parse_TST_TableDataList(M, M[parse_Reference(store[4][0].data)][0]); + { + // .TST.TileStorage + var tile = parse_shallow(store[3][0].data); + var tiles: Array<{id: number, ref: Uint8Array[][]}> = []; + tile[1].forEach(t => { + var tl = (parse_shallow(t.data)); + var ref = M[parse_Reference(tl[2][0].data)][0]; + var mtype = varint_to_i32(ref.meta[1][0].data); + if(mtype != 6002) throw new Error(`6001 unexpected reference to ${mtype}`); + tiles.push({id: varint_to_i32(tl[1][0].data), ref: parse_TST_Tile(M, ref) }); + }); + tiles.forEach((tile) => { + tile.ref.forEach((row, R) => { + row.forEach((buf, C) => { + var dv = u8_to_dataview(buf); + //var version = buf[0]; // numbers 3.x use "3", 6.x - 11.x use "4" + /* TODO: find the correct field position of the data type and value. */ + var ctype = buf[2]; + var addr = encode_cell({r:R,c:C}); + switch(ctype) { + case 0: { // TODO: generic ?? + switch(buf[1]) { + case 3: ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4,true)] } as CellObject; break; + case 2: ws[addr] = { t: "n", v: dv.getFloat64(16, true) } as CellObject; break; + case 0: break; // ws[addr] = { t: "z" } as CellObject; // blank? + case 5: break; // date-time + case 7: break; // duration + case 6: ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 } as CellObject; break; + default: throw new Error(`Unsupported cell type ${buf.slice(0,4)}`); + } + } break; + case 3: { // string + ws[addr] = { t: "s", v: sst[dv.getUint32(16,true)] } as CellObject; + } break; + case 2: { // number + ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) } as CellObject; + } break; + case 6: { // boolean + ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 } as CellObject; // 1 or 0 + } break; + default: throw new Error(`Unsupported cell type ${ctype}`); + } + }); + }); + }); + } + } +} + +// .TST.TableInfoArchive +function parse_TST_TableInfoArchive(M: IWAMessage[][], root: IWAMessage): WorkSheet { + var pb = parse_shallow(root.data); + var out: WorkSheet = { "!ref": "A1" }; + var tableref = M[parse_Reference(pb[2][0].data)]; + var mtype = varint_to_i32(tableref[0].meta[1][0].data); + if(mtype != 6001) throw new Error(`6000 unexpected reference to ${mtype}`); + parse_TST_TableModelArchive(M, tableref[0], out); + return out; +} + +// .TN.SheetArchive +function parse_sheetroot(M: IWAMessage[][], root: IWAMessage): [WorkSheet, string] { + var pb = parse_shallow(root.data); + var out: [WorkSheet, string] = [ { "!ref":"A1" }, (pb[1]?.[0] ? u8str(pb[1][0].data) : "") ]; + var shapeoffs = mappa(pb[2], parse_Reference); + var seen = false; + shapeoffs.forEach((off) => { + M[off].forEach((m: IWAMessage) => { + var mtype = varint_to_i32(m.meta[1][0].data); + if(mtype == 6000) { + if(seen) return; // TODO: multiple Tables in a sheet + out[0] = parse_TST_TableInfoArchive(M, m); + seen = true; + } + }); + }); + return out; +} + +// .TN.DocumentArchive +function parse_docroot(M: IWAMessage[][], root: IWAMessage): WorkBook { + var out = book_new(); + var pb = parse_shallow(root.data); + var sheetoffs = mappa(pb[1], parse_Reference); + sheetoffs.forEach((off) => { + M[off].forEach((m: IWAMessage) => { + var mtype = varint_to_i32(m.meta[1][0].data); + if(mtype == 2) { + var root = parse_sheetroot(M, m); + book_append_sheet(out, root[0], root[1]); + } + }); + }); + if(out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + return out; +} \ No newline at end of file diff --git a/modules/src/proto.ts b/modules/src/proto.ts new file mode 100644 index 000000000..084104526 --- /dev/null +++ b/modules/src/proto.ts @@ -0,0 +1,86 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +type Ptr = [number]; +export { Ptr }; + +/** Parse an integer from the varint that can be exactly stored in a double */ +function parse_varint49(buf: Uint8Array, ptr?: Ptr): number { + var l = ptr ? ptr[0] : 0; + var usz = buf[l] & 0x7F; + varint: if(buf[l++] >= 0x80) { + usz |= (buf[l] & 0x7F) << 7; if(buf[l++] < 0x80) break varint; + usz |= (buf[l] & 0x7F) << 14; if(buf[l++] < 0x80) break varint; + usz |= (buf[l] & 0x7F) << 21; if(buf[l++] < 0x80) break varint; + usz += (buf[l] & 0x7F) * Math.pow(2, 28); ++l; if(buf[l++] < 0x80) break varint; + usz += (buf[l] & 0x7F) * Math.pow(2, 35); ++l; if(buf[l++] < 0x80) break varint; + usz += (buf[l] & 0x7F) * Math.pow(2, 42); ++l; if(buf[l++] < 0x80) break varint; + } + if(ptr) ptr[0] = l; + return usz; +} +export { parse_varint49 }; + +/** Parse a 32-bit signed integer from the raw varint */ +function varint_to_i32(buf: Uint8Array): number { + var l = 0, i32 = buf[l] & 0x7F; + varint: if(buf[l++] >= 0x80) { + i32 |= (buf[l] & 0x7F) << 7; if(buf[l++] < 0x80) break varint; + i32 |= (buf[l] & 0x7F) << 14; if(buf[l++] < 0x80) break varint; + i32 |= (buf[l] & 0x7F) << 21; if(buf[l++] < 0x80) break varint; + i32 |= (buf[l] & 0x7F) << 28; + } + return i32; +} +export { varint_to_i32 }; + +interface ProtoItem { + offset: number; + data: Uint8Array; +} +type ProtoField = Array +type ProtoMessage = Array; +export { ProtoItem, ProtoField, ProtoMessage } +/** Shallow parse of a message */ +function parse_shallow(buf: Uint8Array): ProtoMessage { + var out: ProtoMessage = [], ptr: Ptr = [0]; + while(ptr[0] < buf.length) { + var off = ptr[0]; + var num = parse_varint49(buf, ptr); + var type = num & 0x07; num = Math.floor(num / 8); + var len = 0; + var res: Uint8Array; + if(num == 0) break; + switch(type) { + case 0: { + var l = ptr[0]; + while(buf[ptr[0]++] >= 0x80); + res = buf.slice(l, ptr[0]); + } break; + case 5: len = 4; + /* falls through */ + case 1: if(!len) len = 8; + /* falls through */ + case 2: if(!len) len = parse_varint49(buf, ptr); + res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break; + case 3: // Start group + case 4: // End group + default: throw new Error(`PB Type ${type} for Field ${num} at offset ${off}`); + } + var v: ProtoItem = { offset: off, data: res }; + if(out[num] == null) out[num] = [v]; + else out[num].push(v); + } + return out; +} +export { parse_shallow }; + +function mappa(data: ProtoField, cb:(Uint8Array) => U): U[] { + if(!data) return []; + return data.map((d) => { try { + return cb(d.data); + } catch(e) { + var m = e.message?.match(/at offset (\d+)/); + if(m) e.message = e.message.replace(/at offset (\d+)/, "at offset " + (+m[1] + d.offset)); + throw e; + }}); +} +export { mappa }; \ No newline at end of file diff --git a/modules/src/util.ts b/modules/src/util.ts new file mode 100644 index 000000000..098ba09b0 --- /dev/null +++ b/modules/src/util.ts @@ -0,0 +1,39 @@ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +var u8_to_dataview = (array: Uint8Array): DataView => new DataView(array.buffer, array.byteOffset, array.byteLength); +export { u8_to_dataview }; + +var u8str = (u8: Uint8Array): string => /* Buffer.isBuffer(u8) ? u8.toString() :*/ new TextDecoder().decode(u8); +export { u8str }; + +var u8concat = (u8a: Uint8Array[]): Uint8Array => { + var len = u8a.reduce((acc: number, x: Uint8Array) => acc + x.length, 0); + var out = new Uint8Array(len); + var off = 0; + u8a.forEach(u8 => { out.set(u8, off); off += u8.length; }); + return out; +}; +export { u8concat }; + +var indent = (str: string, depth: number /* = 1 */): string => str.split(/\n/g).map(x => x && " ".repeat(depth) + x).join("\n"); +export { indent }; + +function u8indexOf(u8: Uint8Array, data: string | number | Uint8Array, byteOffset?: number): number { + //if(Buffer.isBuffer(u8)) return u8.indexOf(data, byteOffset); + if(typeof data == "number") return u8.indexOf(data, byteOffset); + var l = byteOffset; + if(typeof data == "string") { + outs: while((l = u8.indexOf(data.charCodeAt(0), l)) > -1) { + ++l; + for(var j = 1; j < data.length; ++j) if(u8[l+j-1] != data.charCodeAt(j)) continue outs; + return l - 1; + } + } else { + outb: while((l = u8.indexOf(data[0], l)) > -1) { + ++l; + for(var j = 1; j < data.length; ++j) if(u8[l+j-1] != data[j]) continue outb; + return l - 1; + } + } + return -1; +} +export { u8indexOf }; \ No newline at end of file diff --git a/package.json b/package.json index 1d1f6fa93..01ec28cda 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "blanket": "~1.2.3", "commander": "~2.17.1", "dtslint": "^0.1.2", + "esbuild": "0.14.14", "eslint": "7.23.0", "eslint-plugin-html": "^6.1.2", "eslint-plugin-json": "^2.1.2", diff --git a/packages/otorp/.eslintrc b/packages/otorp/.eslintrc new file mode 100644 index 000000000..08095a4fb --- /dev/null +++ b/packages/otorp/.eslintrc @@ -0,0 +1,10 @@ +{ + "parser": "@typescript-eslint/parser", + "env": { "shared-node-browser":true }, + "plugins": ["@typescript-eslint"], + "extends": ["plugin:@typescript-eslint/recommended"], + "rules": { + "no-var": 0, + "semi": [ 2, "always" ] + } +} diff --git a/packages/otorp/LICENSE b/packages/otorp/LICENSE new file mode 100644 index 000000000..4bdda8038 --- /dev/null +++ b/packages/otorp/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (C) 2012-present SheetJS LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/otorp/Makefile b/packages/otorp/Makefile new file mode 100644 index 000000000..109330556 --- /dev/null +++ b/packages/otorp/Makefile @@ -0,0 +1,20 @@ +LIBFILES=$(wildcard src/*.ts) +TSFILES=$(wildcard *.ts) +ENTRIES=$(subst .ts,.js,$(TSFILES)) + +.PHONY: all +all: $(ENTRIES) + +index.node.js: index.node.ts $(LIBFILES) + npx esbuild $< --bundle --outfile=$@ --platform=node --format=cjs + +%.node.js: %.node.ts $(LIBFILES) + npx esbuild $< --bundle --external:./ --outfile=$@ --platform=node + sed -i '' 's/ts-node/node/g' $@ + +%.js: %.ts $(LIBFILES) + npx esbuild $< --bundle --outfile=$@ --platform=browser --format=iife --global-name=$* --target=es5 + +.PHONY: clean +clean: + rm $(ENTRIES) diff --git a/packages/otorp/README.md b/packages/otorp/README.md new file mode 100644 index 000000000..932b9623f --- /dev/null +++ b/packages/otorp/README.md @@ -0,0 +1,31 @@ +# otorp + +Recover [Protocol Buffer](https://en.wikipedia.org/wiki/Protocol_Buffers) v2 +definitions from a Mach-O binary. + + +## Usage + +```bash +$ npx otorp /path/to/macho/binary # print all discovered defs to stdout +$ npx otorp /path/to/macho/binary out/ # write each discovered def to a file +``` + +This library and the embedded `otorp` CLI tool make the following assumptions: + +- In a serialized `FileDescriptorProto`, the `name` field appears first. + +- The name does not exceed 127 bytes in length. + +- The name always ends in ".proto". + +- There is at least one simple reference to the start of the definition. + + +## License + +Please consult the attached LICENSE file for details. All rights not explicitly +granted by the Apache 2.0 license are reserved by the Original Author. + +[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/sheetjs?pixel)](https://github.com/SheetJS/sheetjs) + diff --git a/packages/otorp/dump_macho_proto_defs.node.js b/packages/otorp/dump_macho_proto_defs.node.js new file mode 100755 index 000000000..ad974765a --- /dev/null +++ b/packages/otorp/dump_macho_proto_defs.node.js @@ -0,0 +1,26 @@ +#!/usr/bin/env node + +// dump_macho_proto_defs.node.ts +var import_fs = require("fs"); +var import_path = require("path"); +var import__ = require("./"); +if (!process.argv[2] || process.argv[2] == "-h" || process.argv[2] == "--help") { + [ + "usage: otorp [output/folder]", + " if no output folder specified, log all discovered defs", + " if output folder specified, attempt to write defs in the folder" + ].map((x) => console.error(x)); + process.exit(1); +} +var buf = (0, import_fs.readFileSync)(process.argv[2]); +var otorps = (0, import__.otorp)(buf); +otorps.forEach(({ name, proto }) => { + if (!process.argv[3]) { + console.log(proto); + } else { + var pth = (0, import_path.resolve)(process.argv[3] || "./", name.replace(/[/]/g, "$")); + console.error(`writing ${name} to ${pth}`); + (0, import_fs.writeFileSync)(pth, proto); + } +}); +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ diff --git a/packages/otorp/dump_macho_proto_defs.node.ts b/packages/otorp/dump_macho_proto_defs.node.ts new file mode 100755 index 000000000..9d3974001 --- /dev/null +++ b/packages/otorp/dump_macho_proto_defs.node.ts @@ -0,0 +1,29 @@ +#!/usr/bin/env ts-node +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ +/* eslint-env node */ + +import { readFileSync, writeFileSync } from 'fs'; +import { resolve } from 'path'; +import { otorp } from './'; + +if(!process.argv[2] || process.argv[2] == "-h" || process.argv[2] == "--help") { + [ + "usage: otorp [output/folder]", + " if no output folder specified, log all discovered defs", + " if output folder specified, attempt to write defs in the folder" + ].map(x => console.error(x)); + process.exit(1); +} +var buf = readFileSync(process.argv[2]); + +var otorps = otorp(buf); + +otorps.forEach(({name, proto}) => { + if(!process.argv[3]) { + console.log(proto); + } else { + var pth = resolve(process.argv[3] || "./", name.replace(/[/]/g, "$")); + console.error(`writing ${name} to ${pth}`); + writeFileSync(pth, proto); + } +}); diff --git a/packages/otorp/index.node.js b/packages/otorp/index.node.js new file mode 100644 index 000000000..0cdff6fb3 --- /dev/null +++ b/packages/otorp/index.node.js @@ -0,0 +1,539 @@ +var __defProp = Object.defineProperty; +var __getOwnPropDesc = Object.getOwnPropertyDescriptor; +var __getOwnPropNames = Object.getOwnPropertyNames; +var __hasOwnProp = Object.prototype.hasOwnProperty; +var __markAsModule = (target) => __defProp(target, "__esModule", { value: true }); +var __export = (target, all) => { + for (var name in all) + __defProp(target, name, { get: all[name], enumerable: true }); +}; +var __reExport = (target, module2, copyDefault, desc) => { + if (module2 && typeof module2 === "object" || typeof module2 === "function") { + for (let key of __getOwnPropNames(module2)) + if (!__hasOwnProp.call(target, key) && (copyDefault || key !== "default")) + __defProp(target, key, { get: () => module2[key], enumerable: !(desc = __getOwnPropDesc(module2, key)) || desc.enumerable }); + } + return target; +}; +var __toCommonJS = /* @__PURE__ */ ((cache) => { + return (module2, temp) => { + return cache && cache.get(module2) || (temp = __reExport(__markAsModule({}), module2, 1), cache && cache.set(module2, temp), temp); + }; +})(typeof WeakMap !== "undefined" ? /* @__PURE__ */ new WeakMap() : 0); + +// index.node.ts +var index_node_exports = {}; +__export(index_node_exports, { + otorp: () => otorp_default +}); + +// ../../modules/src/util.ts +var u8_to_dataview = (array) => new DataView(array.buffer, array.byteOffset, array.byteLength); +var u8str = (u8) => new TextDecoder().decode(u8); +var indent = (str, depth) => str.split(/\n/g).map((x) => x && " ".repeat(depth) + x).join("\n"); +function u8indexOf(u8, data, byteOffset) { + if (typeof data == "number") + return u8.indexOf(data, byteOffset); + var l = byteOffset; + if (typeof data == "string") { + outs: + while ((l = u8.indexOf(data.charCodeAt(0), l)) > -1) { + ++l; + for (var j = 1; j < data.length; ++j) + if (u8[l + j - 1] != data.charCodeAt(j)) + continue outs; + return l - 1; + } + } else { + outb: + while ((l = u8.indexOf(data[0], l)) > -1) { + ++l; + for (var j = 1; j < data.length; ++j) + if (u8[l + j - 1] != data[j]) + continue outb; + return l - 1; + } + } + return -1; +} + +// src/macho.ts +var parse_fat = (buf) => { + var dv = u8_to_dataview(buf); + if (dv.getUint32(0, false) !== 3405691582) + throw new Error("Unsupported file"); + var nfat_arch = dv.getUint32(4, false); + var out = []; + for (var i = 0; i < nfat_arch; ++i) { + var start = i * 20 + 8; + var cputype = dv.getUint32(start, false); + var cpusubtype = dv.getUint32(start + 4, false); + var offset = dv.getUint32(start + 8, false); + var size = dv.getUint32(start + 12, false); + var align = dv.getUint32(start + 16, false); + out.push({ + type: cputype, + subtype: cpusubtype, + offset, + size, + align, + data: buf.slice(offset, offset + size) + }); + } + return out; +}; +var parse_macho = (buf) => { + var dv = u8_to_dataview(buf); + var magic = dv.getUint32(0, false); + switch (magic) { + case 3405691582: + return parse_fat(buf); + case 3489328638: + return [{ + type: dv.getUint32(4, false), + subtype: dv.getUint32(8, false), + offset: 0, + size: buf.length, + data: buf + }]; + } + throw new Error("Unsupported file"); +}; + +// ../../modules/src/proto.ts +function parse_varint49(buf, ptr) { + var l = ptr ? ptr[0] : 0; + var usz = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + usz |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + usz |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 28); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 35); + ++l; + if (buf[l++] < 128) + break varint; + usz += (buf[l] & 127) * Math.pow(2, 42); + ++l; + if (buf[l++] < 128) + break varint; + } + if (ptr) + ptr[0] = l; + return usz; +} +function varint_to_i32(buf) { + var l = 0, i32 = buf[l] & 127; + varint: + if (buf[l++] >= 128) { + i32 |= (buf[l] & 127) << 7; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 14; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 21; + if (buf[l++] < 128) + break varint; + i32 |= (buf[l] & 127) << 28; + } + return i32; +} +function parse_shallow(buf) { + var out = [], ptr = [0]; + while (ptr[0] < buf.length) { + var off = ptr[0]; + var num = parse_varint49(buf, ptr); + var type = num & 7; + num = Math.floor(num / 8); + var len = 0; + var res; + if (num == 0) + break; + switch (type) { + case 0: + { + var l = ptr[0]; + while (buf[ptr[0]++] >= 128) + ; + res = buf.slice(l, ptr[0]); + } + break; + case 5: + len = 4; + case 1: + if (!len) + len = 8; + case 2: + if (!len) + len = parse_varint49(buf, ptr); + res = buf.slice(ptr[0], ptr[0] + len); + ptr[0] += len; + break; + case 3: + case 4: + default: + throw new Error(`PB Type ${type} for Field ${num} at offset ${off}`); + } + var v = { offset: off, data: res }; + if (out[num] == null) + out[num] = [v]; + else + out[num].push(v); + } + return out; +} +function mappa(data, cb) { + if (!data) + return []; + return data.map((d) => { + try { + return cb(d.data); + } catch (e) { + var m = e.message?.match(/at offset (\d+)/); + if (m) + e.message = e.message.replace(/at offset (\d+)/, "at offset " + (+m[1] + d.offset)); + throw e; + } + }); +} + +// src/descriptor.ts +var TYPES = [ + "error", + "double", + "float", + "int64", + "uint64", + "int32", + "fixed64", + "fixed32", + "bool", + "string", + "group", + "message", + "bytes", + "uint32", + "enum", + "sfixed32", + "sfixed64", + "sint32", + "sint64" +]; +function parse_FileOptions(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.javaPackage = u8str(data[1][0].data); + if (data[8]?.[0]) + out.javaOuterClassname = u8str(data[8][0].data); + if (data[11]?.[0]) + out.goPackage = u8str(data[11][0].data); + return out; +} +function parse_EnumValue(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.name = u8str(data[1][0].data); + if (data[2]?.[0]) + out.number = varint_to_i32(data[2][0].data); + return out; +} +function parse_Enum(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.name = u8str(data[1][0].data); + out.value = mappa(data[2], parse_EnumValue); + return out; +} +var write_Enum = (en) => { + var out = [`enum ${en.name} {`]; + en.value?.forEach(({ name, number }) => out.push(` ${name} = ${number};`)); + return out.concat(`}`).join("\n"); +}; +function parse_FieldOptions(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[2]?.[0]) + out.packed = !!data[2][0].data; + if (data[3]?.[0]) + out.deprecated = !!data[3][0].data; + return out; +} +function parse_Field(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.name = u8str(data[1][0].data); + if (data[2]?.[0]) + out.extendee = u8str(data[2][0].data); + if (data[3]?.[0]) + out.number = varint_to_i32(data[3][0].data); + if (data[4]?.[0]) + out.label = varint_to_i32(data[4][0].data); + if (data[5]?.[0]) + out.type = varint_to_i32(data[5][0].data); + if (data[6]?.[0]) + out.typeName = u8str(data[6][0].data); + if (data[7]?.[0]) + out.defaultValue = u8str(data[7][0].data); + if (data[8]?.[0]) + out.options = parse_FieldOptions(data[8][0].data); + return out; +} +function write_Field(field) { + var out = []; + var label = ["", "optional ", "required ", "repeated "][field.label] || ""; + var type = field.typeName || TYPES[field.type] || "s5s"; + var opts = []; + if (field.defaultValue) + opts.push(`default = ${field.defaultValue}`); + if (field.options?.packed) + opts.push(`packed = true`); + if (field.options?.deprecated) + opts.push(`deprecated = true`); + var os = opts.length ? ` [${opts.join(", ")}]` : ""; + out.push(`${label}${type} ${field.name} = ${field.number}${os};`); + return out.length ? indent(out.join("\n"), 1) : ""; +} +function write_extensions(ext, xtra = false, coalesce = true) { + var res = []; + var xt = []; + ext.forEach((ext2) => { + if (!ext2.extendee) + return; + var row = coalesce ? xt.find((x) => x[0] == ext2.extendee) : xt[xt.length - 1]?.[0] == ext2.extendee ? xt[xt.length - 1] : null; + if (row) + row[1].push(ext2); + else + xt.push([ext2.extendee, [ext2]]); + }); + xt.forEach((extrow) => { + var out = [`extend ${extrow[0]} {`]; + extrow[1].forEach((ext2) => out.push(write_Field(ext2))); + res.push(out.concat(`}`).join("\n") + (xtra ? "\n" : "")); + }); + return res.join("\n"); +} +function parse_mtype(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.name = u8str(data[1][0].data); + if (data[2]?.length >= 1) + out.field = mappa(data[2], parse_Field); + if (data[3]?.length >= 1) + out.nestedType = mappa(data[3], parse_mtype); + if (data[4]?.length >= 1) + out.enumType = mappa(data[4], parse_Enum); + if (data[6]?.length >= 1) + out.extension = mappa(data[6], parse_Field); + if (data[5]?.length >= 1) + out.extensionRange = data[5].map((d) => { + var data2 = parse_shallow(d.data); + var out2 = {}; + if (data2[1]?.[0]) + out2.start = varint_to_i32(data2[1][0].data); + if (data2[2]?.[0]) + out2.end = varint_to_i32(data2[2][0].data); + return out2; + }); + return out; +} +var write_mtype = (message) => { + var out = [`message ${message.name} {`]; + message.nestedType?.forEach((m) => out.push(indent(write_mtype(m), 1))); + message.enumType?.forEach((en) => out.push(indent(write_Enum(en), 1))); + message.field?.forEach((field) => out.push(write_Field(field))); + if (message.extensionRange) + message.extensionRange.forEach((er) => out.push(` extensions ${er.start} to ${er.end - 1};`)); + if (message.extension?.length) + out.push(indent(write_extensions(message.extension), 1)); + return out.concat(`}`).join("\n"); +}; +function parse_FileDescriptor(buf) { + var data = parse_shallow(buf); + var out = {}; + if (data[1]?.[0]) + out.name = u8str(data[1][0].data); + if (data[2]?.[0]) + out.package = u8str(data[2][0].data); + if (data[3]?.[0]) + out.dependency = data[3].map((x) => u8str(x.data)); + if (data[4]?.length >= 1) + out.messageType = mappa(data[4], parse_mtype); + if (data[5]?.length >= 1) + out.enumType = mappa(data[5], parse_Enum); + if (data[7]?.length >= 1) + out.extension = mappa(data[7], parse_Field); + if (data[8]?.[0]) + out.options = parse_FileOptions(data[8][0].data); + return out; +} +var write_FileDescriptor = (pb) => { + var out = [ + 'syntax = "proto2";', + "" + ]; + if (pb.dependency) + pb.dependency.forEach((n) => { + if (n) + out.push(`import "${n}";`); + }); + if (pb.package) + out.push(`package ${pb.package}; +`); + if (pb.options) { + var o = out.length; + if (pb.options.javaPackage) + out.push(`option java_package = "${pb.options.javaPackage}";`); + if (pb.options.javaOuterClassname?.replace(/\W/g, "")) + out.push(`option java_outer_classname = "${pb.options.javaOuterClassname}";`); + if (pb.options.javaMultipleFiles) + out.push(`option java_multiple_files = true;`); + if (pb.options.goPackage) + out.push(`option go_package = "${pb.options.goPackage}";`); + if (out.length > o) + out.push(""); + } + pb.enumType?.forEach((en) => { + if (en.name) + out.push(write_Enum(en) + "\n"); + }); + pb.messageType?.forEach((m) => { + if (m.name) { + var o2 = write_mtype(m); + if (o2) + out.push(o2 + "\n"); + } + }); + if (pb.extension?.length) { + var e = write_extensions(pb.extension, true, false); + if (e) + out.push(e); + } + return out.join("\n") + "\n"; +}; + +// src/otorp.ts +function otorp(buf, builtins = false) { + var res = proto_offsets(buf); + var registry = {}; + var names = /* @__PURE__ */ new Set(); + var out = []; + res.forEach((r, i) => { + if (!builtins && r[1].startsWith("google/protobuf/")) + return; + var b = buf.slice(r[0], i < res.length - 1 ? res[i + 1][0] : buf.length); + var pb = parse_FileDescriptorProto(b); + names.add(r[1]); + registry[r[1]] = pb; + }); + names.forEach((name) => { + names.delete(name); + var pb = registry[name]; + var doit = (pb.dependency || []).every((d) => !names.has(d)); + if (!doit) { + names.add(name); + return; + } + var dups = res.filter((r) => r[1] == name); + if (dups.length == 1) + return out.push({ name, proto: write_FileDescriptor(pb) }); + var pbs = dups.map((r) => { + var i = res.indexOf(r); + var b = buf.slice(r[0], i < res.length - 1 ? res[i + 1][0] : buf.length); + var pb2 = parse_FileDescriptorProto(b); + return write_FileDescriptor(pb2); + }); + for (var l = 1; l < pbs.length; ++l) + if (pbs[l] != pbs[0]) + throw new Error(`Conflicting definitions for ${name} at offsets 0x${dups[0][0].toString(16)} and 0x${dups[l][0].toString(16)}`); + return out.push({ name, proto: pbs[0] }); + }); + return out; +} +var otorp_default = otorp; +var is_referenced = (buf, pos) => { + var dv = u8_to_dataview(buf); + try { + var headers = parse_macho(buf); + for (var i = 0; i < headers.length; ++i) { + if (pos < headers[i].offset || pos > headers[i].offset + headers[i].size) + continue; + var b = headers[i].data; + var p = pos - headers[i].offset; + var ref = new Uint8Array([0, 0, 0, 0, 0, 0, 0, 0]); + var dv = u8_to_dataview(ref); + dv.setInt32(0, p, true); + if (u8indexOf(b, ref, 0) > 0) + return true; + ref[4] = 1; + if (u8indexOf(b, ref, 0) > 0) + return true; + } + } catch (e) { + } + return false; +}; +var proto_offsets = (buf) => { + var meta = parse_macho(buf); + var out = []; + var off = 0; + search: + while ((off = u8indexOf(buf, ".proto", off + 1)) > -1) { + var pos = off; + off += 6; + while (off - pos < 256 && buf[pos] != off - pos - 1) { + if (buf[pos] > 127 || buf[pos] < 32) + continue search; + --pos; + } + if (off - pos > 250) + continue; + var name = u8str(buf.slice(pos + 1, off)); + if (buf[--pos] != 10) + continue; + if (!is_referenced(buf, pos)) { + continue; + } + var bin = meta.find((m) => m.offset <= pos && m.offset + m.size >= pos); + out.push([pos, name, bin?.type || -1, bin?.subtype || -1]); + } + return out; +}; +var parse_FileDescriptorProto = (buf) => { + var l = buf.length; + while (l > 0) + try { + var b = buf.slice(0, l); + var o = parse_FileDescriptor(b); + return o; + } catch (e) { + var m = e.message.match(/at offset (\d+)/); + if (m && parseInt(m[1], 10) < buf.length) + l = parseInt(m[1], 10) - 1; + else + --l; + } + throw new RangeError("no protobuf message in range"); +}; +module.exports = __toCommonJS(index_node_exports); +// Annotate the CommonJS export names for ESM import in node: +0 && (module.exports = { + otorp +}); +/*! otorp (C) 2013-present SheetJS -- http://sheetjs.com */ +/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */ diff --git a/packages/otorp/index.node.ts b/packages/otorp/index.node.ts new file mode 100644 index 000000000..135ddda6e --- /dev/null +++ b/packages/otorp/index.node.ts @@ -0,0 +1,4 @@ +/*! otorp (C) 2013-present SheetJS -- http://sheetjs.com */ + +import otorp from "./src/otorp"; +export { otorp }; \ No newline at end of file diff --git a/packages/otorp/package.json b/packages/otorp/package.json new file mode 100644 index 000000000..6a0edd5d9 --- /dev/null +++ b/packages/otorp/package.json @@ -0,0 +1,31 @@ +{ + "name": "otorp", + "version": "0.0.0", + "author": "sheetjs", + "description": "Recover protobuf definitions from Mach-O binaries", + "bin": { + "otorp": "dump_macho_proto_defs.node.js" + }, + "main": "index.node.js", + "dependencies": { + }, + "devDependencies": { + "esbuild": "0.14.14" + }, + "repository": { + "type": "git", + "url": "git://github.com/SheetJS/sheetjs.git" + }, + "scripts": { + "build": "make" + }, + "homepage": "https://sheetjs.com/", + "files": ["index.node.js", "dump_macho_proto_defs.node.js", "LICENSE", "README.md"], + "bugs": { + "url": "https://github.com/SheetJS/sheetjs/issues" + }, + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } +} diff --git a/packages/otorp/src/descriptor.ts b/packages/otorp/src/descriptor.ts new file mode 100644 index 000000000..036d93f44 --- /dev/null +++ b/packages/otorp/src/descriptor.ts @@ -0,0 +1,238 @@ +/*! otorp (C) 2013-present SheetJS -- http://sheetjs.com */ + +import { parse_shallow, varint_to_i32, mappa } from "../../../modules/src/proto"; +import { u8str, indent } from "../../../modules/src/util"; + +var TYPES = [ + "error", + "double", + "float", + "int64", + "uint64", + "int32", + "fixed64", + "fixed32", + "bool", + "string", + "group", + "message", + "bytes", + "uint32", + "enum", + "sfixed32", + "sfixed64", + "sint32", + "sint64" +]; +export { TYPES }; + + +interface FileOptions { + javaPackage?: string; + javaOuterClassname?: string; + javaMultipleFiles?: string; + goPackage?: string; +} +function parse_FileOptions(buf: Uint8Array): FileOptions { + var data = parse_shallow(buf); + var out: FileOptions = {}; + if(data[1]?.[0]) out.javaPackage = u8str(data[1][0].data); + if(data[8]?.[0]) out.javaOuterClassname = u8str(data[8][0].data); + if(data[11]?.[0]) out.goPackage = u8str(data[11][0].data); + return out; +} + + +interface EnumValue { + name?: string; + number?: number; +} +function parse_EnumValue(buf: Uint8Array): EnumValue { + var data = parse_shallow(buf); + var out: EnumValue = {}; + if(data[1]?.[0]) out.name = u8str(data[1][0].data); + if(data[2]?.[0]) out.number = varint_to_i32(data[2][0].data); + return out; +} + + +interface Enum { + name?: string; + value?: EnumValue[]; +} +function parse_Enum(buf: Uint8Array): Enum { + var data = parse_shallow(buf); + var out: Enum = {}; + if(data[1]?.[0]) out.name = u8str(data[1][0].data); + out.value = mappa(data[2], parse_EnumValue); + return out; +} +var write_Enum = (en: Enum): string => { + var out = [`enum ${en.name} {`]; + en.value?.forEach(({name, number}) => out.push(` ${name} = ${number};`)); + return out.concat(`}`).join("\n"); +}; +export { Enum, parse_Enum, write_Enum }; + + +interface FieldOptions { + packed?: boolean; + deprecated?: boolean; +} +function parse_FieldOptions(buf: Uint8Array): FieldOptions { + var data = parse_shallow(buf); + var out: FieldOptions = {}; + if(data[2]?.[0]) out.packed = !!data[2][0].data; + if(data[3]?.[0]) out.deprecated = !!data[3][0].data; + return out; +} + + +interface Field { + name?: string; + extendee?: string; + number?: number; + label?: number; + type?: number; + typeName?: string; + defaultValue?: string; + options?: FieldOptions; +} +function parse_Field(buf: Uint8Array): Field { + var data = parse_shallow(buf); + var out: Field = {}; + if(data[1]?.[0]) out.name = u8str(data[1][0].data); + if(data[2]?.[0]) out.extendee = u8str(data[2][0].data); + if(data[3]?.[0]) out.number = varint_to_i32(data[3][0].data); + if(data[4]?.[0]) out.label = varint_to_i32(data[4][0].data); + if(data[5]?.[0]) out.type = varint_to_i32(data[5][0].data); + if(data[6]?.[0]) out.typeName = u8str(data[6][0].data); + if(data[7]?.[0]) out.defaultValue = u8str(data[7][0].data); + if(data[8]?.[0]) out.options = parse_FieldOptions(data[8][0].data); + return out; +} +function write_Field(field: Field): string { + var out = []; + var label = ["", "optional ", "required ", "repeated "][field.label] || ""; + var type = field.typeName || TYPES[field.type] || "s5s"; + var opts = []; + if(field.defaultValue) opts.push(`default = ${field.defaultValue}`); + if(field.options?.packed) opts.push(`packed = true`); + if(field.options?.deprecated) opts.push(`deprecated = true`); + var os = opts.length ? ` [${opts.join(", ")}]`: ""; + out.push(`${label}${type} ${field.name} = ${field.number}${os};`); + return out.length ? indent(out.join("\n"), 1) : ""; +} +export { Field, parse_Field, write_Field }; + + +function write_extensions(ext: Field[], xtra = false, coalesce = true): string { + var res = []; + var xt: Array<[string, Array]> = []; + ext.forEach(ext => { + if(!ext.extendee) return; + var row = coalesce ? + xt.find(x => x[0] == ext.extendee) : + (xt[xt.length - 1]?.[0] == ext.extendee ? xt[xt.length - 1]: null); + if(row) row[1].push(ext); + else xt.push([ext.extendee, [ext]]); + }); + xt.forEach(extrow => { + var out = [`extend ${extrow[0]} {`]; + extrow[1].forEach(ext => out.push(write_Field(ext))); + res.push(out.concat(`}`).join("\n") + (xtra ? "\n" : "")); + }); + return res.join("\n"); +} +export { write_extensions }; + + +interface ExtensionRange { start?: number; end?: number; } +interface MessageType { + name?: string; + nestedType?: MessageType[]; + enumType?: Enum[]; + field?: Field[]; + extension?: Field[]; + extensionRange?: ExtensionRange[]; +} +function parse_mtype(buf: Uint8Array): MessageType { + var data = parse_shallow(buf); + var out: MessageType = {}; + if(data[1]?.[0]) out.name = u8str(data[1][0].data); + if(data[2]?.length >= 1) out.field = mappa(data[2], parse_Field); + if(data[3]?.length >= 1) out.nestedType = mappa(data[3], parse_mtype); + if(data[4]?.length >= 1) out.enumType = mappa(data[4], parse_Enum); + if(data[6]?.length >= 1) out.extension = mappa(data[6], parse_Field); + if(data[5]?.length >= 1) out.extensionRange = data[5].map(d => { + var data = parse_shallow(d.data); + var out: ExtensionRange = {}; + if(data[1]?.[0]) out.start = varint_to_i32(data[1][0].data); + if(data[2]?.[0]) out.end = varint_to_i32(data[2][0].data); + return out; + }); + return out; +} +var write_mtype = (message: MessageType): string => { + var out = [ `message ${message.name} {` ]; + message.nestedType?.forEach(m => out.push(indent(write_mtype(m), 1))); + message.enumType?.forEach(en => out.push(indent(write_Enum(en), 1))); + message.field?.forEach(field => out.push(write_Field(field))); + if(message.extensionRange) message.extensionRange.forEach(er => out.push(` extensions ${er.start} to ${er.end - 1};`)); + if(message.extension?.length) out.push(indent(write_extensions(message.extension), 1)); + return out.concat(`}`).join("\n"); +}; + + +interface Descriptor { + name?: string; + package?: string; + dependency?: string[]; + messageType?: MessageType[]; + enumType?: Enum[]; + extension?: Field[]; + options?: FileOptions; +} +function parse_FileDescriptor(buf: Uint8Array): Descriptor { + var data = parse_shallow(buf); + var out: Descriptor = {}; + if(data[1]?.[0]) out.name = u8str(data[1][0].data); + if(data[2]?.[0]) out.package = u8str(data[2][0].data); + if(data[3]?.[0]) out.dependency = data[3].map(x => u8str(x.data)); + + if(data[4]?.length >= 1) out.messageType = mappa(data[4], parse_mtype); + if(data[5]?.length >= 1) out.enumType = mappa(data[5], parse_Enum); + if(data[7]?.length >= 1) out.extension = mappa(data[7], parse_Field); + + if(data[8]?.[0]) out.options = parse_FileOptions(data[8][0].data); + + return out; +} +var write_FileDescriptor = (pb: Descriptor): string => { + var out = [ + 'syntax = "proto2";', + '' + ]; + if(pb.dependency) pb.dependency.forEach((n: string) => { if(n) out.push(`import "${n}";`); }); + if(pb.package) out.push(`package ${pb.package};\n`); + if(pb.options) { + var o = out.length; + + if(pb.options.javaPackage) out.push(`option java_package = "${pb.options.javaPackage}";`); + if(pb.options.javaOuterClassname?.replace(/\W/g, "")) out.push(`option java_outer_classname = "${pb.options.javaOuterClassname}";`); + if(pb.options.javaMultipleFiles) out.push(`option java_multiple_files = true;`); + if(pb.options.goPackage) out.push(`option go_package = "${pb.options.goPackage}";`); + + if(out.length > o) out.push(''); + } + + pb.enumType?.forEach(en => { if(en.name) out.push(write_Enum(en) + "\n"); }); + pb.messageType?.forEach(m => { if(m.name) { var o = write_mtype(m); if(o) out.push(o + "\n"); }}); + + if(pb.extension?.length) { + var e = write_extensions(pb.extension, true, false); + if(e) out.push(e); + } + return out.join("\n") + "\n"; +}; +export { Descriptor, parse_FileDescriptor, write_FileDescriptor }; \ No newline at end of file diff --git a/packages/otorp/src/macho.ts b/packages/otorp/src/macho.ts new file mode 100644 index 000000000..a93296265 --- /dev/null +++ b/packages/otorp/src/macho.ts @@ -0,0 +1,55 @@ +/*! otorp (C) 2013-present SheetJS -- http://sheetjs.com */ + +import { u8_to_dataview } from "../../../modules/src/util"; + +interface MachOEntry { + type: number; + subtype: number; + offset: number; + size: number; + align?: number; + data: Uint8Array; +} +var parse_fat = (buf: Uint8Array): MachOEntry[] => { + var dv = u8_to_dataview(buf); + if(dv.getUint32(0, false) !== 0xCAFEBABE) throw new Error("Unsupported file"); + var nfat_arch = dv.getUint32(4, false); + var out: MachOEntry[] = []; + for(var i = 0; i < nfat_arch; ++i) { + var start = i * 20 + 8; + + var cputype = dv.getUint32(start, false); + var cpusubtype = dv.getUint32(start+4, false); + var offset = dv.getUint32(start+8, false); + var size = dv.getUint32(start+12, false); + var align = dv.getUint32(start+16, false); + + out.push({ + type: cputype, + subtype: cpusubtype, + offset, + size, + align, + data: buf.slice(offset, offset + size) + }); + } + return out; +}; +var parse_macho = (buf: Uint8Array): MachOEntry[] => { + var dv = u8_to_dataview(buf); + var magic = dv.getUint32(0, false); + switch(magic) { + // fat binary (x86_64 / aarch64) + case 0xCAFEBABE: return parse_fat(buf); + // x86_64 + case 0xCFFAEDFE: return [{ + type: dv.getUint32(4, false), + subtype: dv.getUint32(8, false), + offset: 0, + size: buf.length, + data: buf + }]; + } + throw new Error("Unsupported file"); +}; +export { MachOEntry, parse_macho }; \ No newline at end of file diff --git a/packages/otorp/src/otorp.ts b/packages/otorp/src/otorp.ts new file mode 100644 index 000000000..3c773e4ba --- /dev/null +++ b/packages/otorp/src/otorp.ts @@ -0,0 +1,113 @@ +/*! otorp (C) 2013-present SheetJS -- http://sheetjs.com */ + +import { u8indexOf, u8str, u8_to_dataview } from "../../../modules/src/util"; +import { parse_macho } from "./macho"; +import { Descriptor, parse_FileDescriptor, write_FileDescriptor } from './descriptor'; + + +interface OtorpEntry { + name: string; + proto: string; +} +export { OtorpEntry }; + +/** Find and stringify all relevant protobuf defs */ +function otorp(buf: Uint8Array, builtins = false): OtorpEntry[] { + var res = proto_offsets(buf); + var registry: {[key: string]: Descriptor} = {}; + var names: Set = new Set(); + var out: OtorpEntry[] = []; + + res.forEach((r, i) => { + if(!builtins && r[1].startsWith("google/protobuf/")) return; + var b = buf.slice(r[0], i < res.length - 1 ? res[i+1][0] : buf.length); + var pb = parse_FileDescriptorProto(b/*, r[1]*/); + names.add(r[1]); + registry[r[1]] = pb; + }); + + names.forEach(name => { + /* ensure partial ordering by dependencies */ + names.delete(name); + var pb = registry[name]; + var doit = (pb.dependency||[]).every((d: string) => !names.has(d)); + if(!doit) { names.add(name); return; } + + var dups = res.filter(r => r[1] == name); + if(dups.length == 1) return out.push({ name, proto: write_FileDescriptor(pb) }); + + /* in a fat binary, compare the defs for x86_64/aarch64 */ + var pbs = dups.map(r => { + var i = res.indexOf(r); + var b = buf.slice(r[0], i < res.length - 1 ? res[i+1][0] : buf.length); + var pb = parse_FileDescriptorProto(b/*, r[1]*/); + return write_FileDescriptor(pb); + }); + for(var l = 1; l < pbs.length; ++l) if(pbs[l] != pbs[0]) throw new Error(`Conflicting definitions for ${name} at offsets 0x${dups[0][0].toString(16)} and 0x${dups[l][0].toString(16)}`); + return out.push({ name, proto: pbs[0] }); + }); + + return out; +} +export default otorp; + +/** Determine if an address is being referenced */ +var is_referenced = (buf: Uint8Array, pos: number): boolean => { + var dv = u8_to_dataview(buf); + + try { + var headers = parse_macho(buf); + for(var i = 0; i < headers.length; ++i) { + if(pos < headers[i].offset || pos > headers[i].offset + headers[i].size) continue; + var b = headers[i].data; + var p = pos - headers[i].offset; + var ref = new Uint8Array([0,0,0,0,0,0,0,0]); + var dv = u8_to_dataview(ref); + dv.setInt32(0, p, true); + if(u8indexOf(b, ref, 0) > 0) return true; + ref[4] = 0x01; + if(u8indexOf(b, ref, 0) > 0) return true; + } + } catch(e) {} + + return false; +}; + +type OffsetList = Array<[number, string, number, number]>; +/** Generate a list of potential starting points */ +var proto_offsets = (buf: Uint8Array): OffsetList => { + var meta = parse_macho(buf); + var out: OffsetList = []; + var off = 0; + /* note: this loop only works for names < 128 chars */ + search: while((off = u8indexOf(buf, ".proto", off + 1)) > -1) { + var pos = off; + off += 6; + while(off - pos < 256 && buf[pos] != off - pos - 1) { + if(buf[pos] > 0x7F || buf[pos] < 0x20) continue search; + --pos; + } + if(off - pos > 250) continue; + var name = u8str(buf.slice(pos + 1, off)); + if(buf[--pos] != 0x0A) continue; + if(!is_referenced(buf, pos)) { /* console.error(`Reference to ${name} not found`); */ continue; } + var bin = meta.find(m => m.offset <= pos && m.offset + m.size >= pos); + out.push([pos, name, bin?.type || -1, bin?.subtype || -1]); + } + return out; +}; + +/** Parse a descriptor that starts with the first byte of the supplied buffer */ +var parse_FileDescriptorProto = (buf: Uint8Array): Descriptor => { + var l = buf.length; + while(l > 0) try { + var b = buf.slice(0,l); + var o = parse_FileDescriptor(b); + return o; + } catch(e) { + var m = e.message.match(/at offset (\d+)/); + if(m && parseInt(m[1], 10) < buf.length) l = parseInt(m[1], 10) - 1; + else --l; + } + throw new RangeError("no protobuf message in range"); +};