From eafc07926bba158f679e660236f017455b32e761 Mon Sep 17 00:00:00 2001 From: ThomasChan Date: Thu, 19 Sep 2019 20:23:39 +0800 Subject: [PATCH 1/3] improve parse_dom_table merge cell logic performance --- xlsx.flow.js | 34 +++++++++++++++++++++------------- xlsx.js | 34 +++++++++++++++++++++------------- xlsx.mini.flow.js | 34 +++++++++++++++++++++------------- xlsx.mini.js | 34 +++++++++++++++++++++------------- 4 files changed, 84 insertions(+), 52 deletions(-) diff --git a/xlsx.flow.js b/xlsx.flow.js index f8bbb790b..18e3d815d 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -19160,29 +19160,37 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges/*:Array*/ = [], midx = 0; + var merges/*:Array*/ = [], midx = 0, m, cInRange/*:Array*/ = [], cache/*:Object*/ = {}; var rowinfo/*:Array*/ = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row/*:HTMLTableRowElement*/ = rows[_R]; + row/*:HTMLTableRowElement*/ = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts/*:HTMLCollection*/ = (row.children/*:any*/); + elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C]; + elt/*:HTMLTableCellElement*/ = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v/*:string*/ = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m/*:Range*/ = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h/*:string*/ = elt.innerHTML; + v/*:string*/ = cache[h] || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx/*:number*/ = merges.length; + while (midx--) { + m/*:Range*/ = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o/*:Cell*/ = {t:'s', v:v}; - var _t/*:string*/ = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o/*:Cell*/ = {t:'s', v:v}; + _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.js b/xlsx.js index 5c4744e0c..004bf20ac 100644 --- a/xlsx.js +++ b/xlsx.js @@ -19044,29 +19044,37 @@ function parse_dom_table(table, _opts) { var rows = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges = [], midx = 0; + var merges = [], midx = 0, m, cInRange = [], cache = {}; var rowinfo = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row = rows[_R]; + row = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts = (row.children); + elts = (row.children); for(_C = C = 0; _C < elts.length; ++_C) { - var elt = elts[_C]; + elt = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h = elt.innerHTML; + v = h || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx = merges.length; + while (midx--) { + m = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o = {t:'s', v:v}; - var _t = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o = {t:'s', v:v}; + _t = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.mini.flow.js b/xlsx.mini.flow.js index 9fe0d0a92..3fb78c660 100644 --- a/xlsx.mini.flow.js +++ b/xlsx.mini.flow.js @@ -7379,29 +7379,37 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges/*:Array*/ = [], midx = 0; + var merges/*:Array*/ = [], midx = 0, m, cInRange/*:Array*/ = [], cache/*:Object*/ = {}; var rowinfo/*:Array*/ = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row/*:HTMLTableRowElement*/ = rows[_R]; + row/*:HTMLTableRowElement*/ = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts/*:HTMLCollection*/ = (row.children/*:any*/); + elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C]; + elt/*:HTMLTableCellElement*/ = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v/*:string*/ = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m/*:Range*/ = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h/*:string*/ = elt.innerHTML; + v/*:string*/ = cache[h] || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx/*:number*/ = merges.length; + while (midx--) { + m/*:Range*/ = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o/*:Cell*/ = {t:'s', v:v}; - var _t/*:string*/ = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o/*:Cell*/ = {t:'s', v:v}; + _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.mini.js b/xlsx.mini.js index 021ffc264..18e06174d 100644 --- a/xlsx.mini.js +++ b/xlsx.mini.js @@ -7288,29 +7288,37 @@ function parse_dom_table(table, _opts) { var rows = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges = [], midx = 0; + var merges = [], midx = 0, m, cInRange = [], cache = {}; var rowinfo = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row = rows[_R]; + row = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts = (row.children); + elts = (row.children); for(_C = C = 0; _C < elts.length; ++_C) { - var elt = elts[_C]; + elt = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h = elt.innerHTML; + v = h || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx = merges.length; + while (midx--) { + m = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o = {t:'s', v:v}; - var _t = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o = {t:'s', v:v}; + _t = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} From 760c55d533b05f60c3fa3290e8b468f00f974a45 Mon Sep 17 00:00:00 2001 From: ThomasChan Date: Thu, 19 Sep 2019 20:26:56 +0800 Subject: [PATCH 2/3] improve parse_dom_table merge cell logic performance --- xlsx.flow.js | 2 +- xlsx.js | 2 +- xlsx.mini.flow.js | 2 +- xlsx.mini.js | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xlsx.flow.js b/xlsx.flow.js index 18e3d815d..5dd0e1c12 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -19188,7 +19188,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; - if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + if((RS = +elt.rowSpan)>1 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); o/*:Cell*/ = {t:'s', v:v}; _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { diff --git a/xlsx.js b/xlsx.js index 004bf20ac..0344342e5 100644 --- a/xlsx.js +++ b/xlsx.js @@ -19072,7 +19072,7 @@ function parse_dom_table(table, _opts) { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; - if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + if((RS = +elt.rowSpan)>1 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); o = {t:'s', v:v}; _t = elt.getAttribute("t") || ""; if(v != null) { diff --git a/xlsx.mini.flow.js b/xlsx.mini.flow.js index 3fb78c660..b8f9bad2c 100644 --- a/xlsx.mini.flow.js +++ b/xlsx.mini.flow.js @@ -7407,7 +7407,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; - if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + if((RS = +elt.rowSpan)>1 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); o/*:Cell*/ = {t:'s', v:v}; _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { diff --git a/xlsx.mini.js b/xlsx.mini.js index 18e06174d..53892442a 100644 --- a/xlsx.mini.js +++ b/xlsx.mini.js @@ -7316,7 +7316,7 @@ function parse_dom_table(table, _opts) { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; - if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + if((RS = +elt.rowSpan)>1 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); o = {t:'s', v:v}; _t = elt.getAttribute("t") || ""; if(v != null) { From aaefb5344e5407d7d2bbd5eea663a644ad995eb6 Mon Sep 17 00:00:00 2001 From: ThomasChan Date: Wed, 25 Sep 2019 15:13:40 +0800 Subject: [PATCH 3/3] fix xlsx merge cells logic --- xlsx.flow.js | 7 ++++++- xlsx.js | 7 ++++++- xlsx.mini.flow.js | 7 ++++++- xlsx.mini.js | 7 ++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/xlsx.flow.js b/xlsx.flow.js index 5dd0e1c12..69bc77a8a 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -19184,7 +19184,12 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } } if (cInRange.indexOf(C) !== -1) { - C = Math.max.apply(null, cInRange) + 1; + for (let i = 0; i < cInRange.length; i++) { + C += 1; + if (cInRange.indexOf(C) === -1) { + break; + } + } } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; diff --git a/xlsx.js b/xlsx.js index 0344342e5..08f1345ae 100644 --- a/xlsx.js +++ b/xlsx.js @@ -19068,7 +19068,12 @@ function parse_dom_table(table, _opts) { } } if (cInRange.indexOf(C) !== -1) { - C = Math.max.apply(null, cInRange) + 1; + for (let i = 0; i < cInRange.length; i++) { + C += 1; + if (cInRange.indexOf(C) === -1) { + break; + } + } } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; diff --git a/xlsx.mini.flow.js b/xlsx.mini.flow.js index b8f9bad2c..c836af719 100644 --- a/xlsx.mini.flow.js +++ b/xlsx.mini.flow.js @@ -7403,7 +7403,12 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } } if (cInRange.indexOf(C) !== -1) { - C = Math.max.apply(null, cInRange) + 1; + for (let i = 0; i < cInRange.length; i++) { + C += 1; + if (cInRange.indexOf(C) === -1) { + break; + } + } } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1; diff --git a/xlsx.mini.js b/xlsx.mini.js index 53892442a..c1f934253 100644 --- a/xlsx.mini.js +++ b/xlsx.mini.js @@ -7312,7 +7312,12 @@ function parse_dom_table(table, _opts) { } } if (cInRange.indexOf(C) !== -1) { - C = Math.max.apply(null, cInRange) + 1; + for (let i = 0; i < cInRange.length; i++) { + C += 1; + if (cInRange.indexOf(C) === -1) { + break; + } + } } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.colSpan || 1;