From 27cc37738065f176e355d48cba6bccc9998e95b1 Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 24 Mar 2023 10:05:59 -0700 Subject: [PATCH 1/7] Use Parser to generate _fields for headers --- papaparse.js | 120 +++++++++++++++++++------------------------- tests/test-cases.js | 65 +++++++++++++++++++++--- 2 files changed, 109 insertions(+), 76 deletions(-) diff --git a/papaparse.js b/papaparse.js index 1fd7dc4c..0dbddd45 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1097,11 +1097,8 @@ License: MIT } var parserConfig = copy(_config); - if (_config.preview && _config.header) - parserConfig.preview++; // to compensate for header row - _input = input; - _parser = new Parser(parserConfig); + _parser = new Parser(parserConfig, _fields); _results = _parser.parse(_input, baseIndex, ignoreLastRow); processResults(); return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); @@ -1178,8 +1175,9 @@ License: MIT }); } - if (needsHeaderRow()) - fillHeaderFields(); + if (needsHeaderRow() && _results.meta.fields) { + _fields = _fields.concat(_results.meta.fields); + } return applyHeaderAndDynamicTypingAndTransformation(); } @@ -1189,31 +1187,6 @@ License: MIT return _config.header && _fields.length === 0; } - function fillHeaderFields() - { - if (!_results) - return; - - function addHeader(header, i) - { - if (isFunction(_config.transformHeader)) - header = _config.transformHeader(header, i); - - _fields.push(header); - } - - if (Array.isArray(_results.data[0])) - { - for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) - _results.data[i].forEach(addHeader); - - _results.data.splice(0, 1); - } - // if _results.data[0] is not an array, we are in a step where _results.data is the row. - else - _results.data.forEach(addHeader); - } - function shouldApplyDynamicTyping(field) { // Cache function values to avoid calling it for each row if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) { @@ -1316,7 +1289,7 @@ License: MIT delimiter: delim, newline: newline, preview: 10 - }).parse(input); + }, _fields).parse(input); for (var j = 0; j < preview.data.length; j++) { if (skipEmptyLines && testEmptyLine(preview.data[j])) { @@ -1402,7 +1375,7 @@ License: MIT } /** The core parser implements speedy and correct CSV parsing */ - function Parser(config) + function Parser(config, _fields) { // Unpack the config object config = config || {}; @@ -1444,6 +1417,7 @@ License: MIT // We're gonna need these at the Parser scope var cursor = 0; var aborted = false; + var fields = _fields; this.parse = function(input, baseIndex, ignoreLastRow) { @@ -1466,40 +1440,6 @@ License: MIT if (!input) return returnable(); - // Rename headers if there are duplicates - if (config.header && !baseIndex) - { - var firstLine = input.split(newline)[0]; - var headers = firstLine.split(delim); - var separator = '_'; - var headerMap = []; - var headerCount = {}; - var duplicateHeaders = false; - - for (var j in headers) { - var header = headers[j]; - if (isFunction(config.transformHeader)) - header = config.transformHeader(header, j); - var headerName = header; - - var count = headerCount[header] || 0; - if (count > 0) { - duplicateHeaders = true; - headerName = header + separator + count; - } - headerCount[header] = count + 1; - // In case it already exists, we add more separtors - while (headerMap.includes(headerName)) { - headerName = headerName + separator + count; - } - headerMap.push(headerName); - } - if (duplicateHeaders) { - var editedInput = input.split(newline); - editedInput[0] = headerMap.join(delim); - input = editedInput.join(newline); - } - } if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) { var rows = input.split(newline); @@ -1707,10 +1647,42 @@ License: MIT function pushRow(row) { - data.push(row); + if (config.header && (fields.length === 0)) + buildHeaders(row); + else + data.push(row); lastCursor = cursor; } + function buildHeaders(headers) { + // duplicate headers will have '_x" appended to them. + var separator = '_'; + var headerMap = []; + var headerCount = {}; + + for (var j in headers) { + var header = headers[j]; + if (isFunction(config.transformHeader)) + header = config.transformHeader(header, j); + var headerName = header; + + var count = headerCount[header] || 0; + if (count > 0) { + headerName = header + separator + count; + } + headerCount[header] = count + 1; + // In case it already exists, we add more separtors + while (headerMap.includes(headerName)) { + headerName = headerName + separator + count; + } + headerMap.push(headerName); + } + + fields = headerMap; + + return headerMap; + } + /** * checks if there are extra spaces after closing quote and given index without any text * if Yes, returns the number of spaces @@ -1761,7 +1733,7 @@ License: MIT /** Returns an object with the results, errors, and meta. */ function returnable(stopped) { - return { + var results = { data: data, errors: errors, meta: { @@ -1772,6 +1744,16 @@ License: MIT cursor: lastCursor + (baseIndex || 0) } }; + + if (config.header && fields && fields.length > 0) { + var copiedFields = []; // prevent user from mutating internal state + for (var i = 0; i < fields.length; i++) { + copiedFields[i] = fields[i]; + } + results.meta.fields = copiedFields; + } + + return results; } /** Executes the user's step function and resets data & errors. */ diff --git a/tests/test-cases.js b/tests/test-cases.js index 0243a2c5..e6afeb51 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -591,8 +591,11 @@ var CORE_PARSER_TESTS = [ input: 'A,A,A,A\n1,2,3,4', config: { header: true }, expected: { - data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['A', 'A_1', 'A_2', 'A_3'] + } } }, { @@ -600,8 +603,11 @@ var CORE_PARSER_TESTS = [ input: 'A,A,A,A\n1,2,3,4', config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } }, expected: { - data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['a', 'a_1', 'a_2', 'a_3'] + } } }, { @@ -609,8 +615,11 @@ var CORE_PARSER_TESTS = [ input: 'c,c,c,c_1\n1,2,3,4', config: { header: true }, expected: { - data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['c', 'c_1', 'c_2', 'c_1_0'] + } } }, ]; @@ -618,7 +627,7 @@ var CORE_PARSER_TESTS = [ describe('Core Parser Tests', function() { function generateTest(test) { (test.disabled ? it.skip : it)(test.description, function() { - var actual = new Papa.Parser(test.config).parse(test.input); + var actual = new Papa.Parser(test.config, []).parse(test.input); assert.deepEqual(actual.errors, test.expected.errors); assert.deepEqual(actual.data, test.expected.data); }); @@ -2018,6 +2027,48 @@ describe('Unparse Tests', function() { var CUSTOM_TESTS = [ + { + description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", + expected: [["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], [ + { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "", "Column 5": "" }, + { "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "", "Column 5": "" }, + { "Column 1": "R3C1", "Column 2": "", "Column 3": "", "Column 4": "R3C4", "Column 5": "" }, + { "Column 1": "R4C1", "Column 2": "", "Column 3": "", "Column 4": "", "Column 5": "" }, + ]], + run: function(callback) { + var inputString = [ + "Column 1,Column 2,Column 3,Column 4,Column 5", + "R1C1,,R1C3,,", + "R2C1,,,,", + "R3C1,,,R3C4,", + "R4C1,,,," + ].join("\n"); + var output = []; + var dataRows = []; + var headers = null; + Papa.parse(inputString, { + header: true, + step: function(results, parser) { + if (results) + { + if (!headers) { + headers = results.meta.fields; + } + parser.pause(); + parser.resume(); + if (results.data) { + dataRows.push(results.data); + } + } + }, + complete: function() { + output.push(headers); + output.push(dataRows); + callback(output); + } + }); + } + }, { description: "Pause and resume works (Regression Test for Bug #636)", disabled: !XHR_ENABLED, From 86a6d271a2626a2ad55b8383494f8877ab32236c Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 24 Mar 2023 10:31:45 -0700 Subject: [PATCH 2/7] Update test to ensure fields are sent every time --- tests/test-cases.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test-cases.js b/tests/test-cases.js index e6afeb51..b2b36b74 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -2029,7 +2029,12 @@ describe('Unparse Tests', function() { var CUSTOM_TESTS = [ { description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", - expected: [["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], [ + expected: [[ + ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], + ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], + ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], + ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"] + ], [ { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "", "Column 5": "" }, { "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "", "Column 5": "" }, { "Column 1": "R3C1", "Column 2": "", "Column 3": "", "Column 4": "R3C4", "Column 5": "" }, @@ -2045,15 +2050,13 @@ var CUSTOM_TESTS = [ ].join("\n"); var output = []; var dataRows = []; - var headers = null; + var headerResults = []; Papa.parse(inputString, { header: true, step: function(results, parser) { if (results) { - if (!headers) { - headers = results.meta.fields; - } + headerResults.push(results.meta.fields); parser.pause(); parser.resume(); if (results.data) { @@ -2062,7 +2065,7 @@ var CUSTOM_TESTS = [ } }, complete: function() { - output.push(headers); + output.push(headerResults); output.push(dataRows); callback(output); } From b84183abf6e19860ef0d5e7ff844313f0a2b79b9 Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 24 Mar 2023 10:38:00 -0700 Subject: [PATCH 3/7] Clean up extraneous return statement in buildHeaders --- papaparse.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/papaparse.js b/papaparse.js index 0dbddd45..e224e365 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1679,8 +1679,6 @@ License: MIT } fields = headerMap; - - return headerMap; } /** From a151558c64492fc5ade1d709268aeed8e36194ea Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 24 Mar 2023 10:40:10 -0700 Subject: [PATCH 4/7] Remove extraneous fields check from returnable --- papaparse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/papaparse.js b/papaparse.js index e224e365..a54a7135 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1743,7 +1743,7 @@ License: MIT } }; - if (config.header && fields && fields.length > 0) { + if (config.header && fields.length > 0) { var copiedFields = []; // prevent user from mutating internal state for (var i = 0; i < fields.length; i++) { copiedFields[i] = fields[i]; From 569c262d5a05ae54364255329138560708bcf0d2 Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 24 Mar 2023 10:43:15 -0700 Subject: [PATCH 5/7] Clean up copying of fields to results I *think* this is the last commit I have. Sigh. --- papaparse.js | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/papaparse.js b/papaparse.js index a54a7135..8448e197 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1743,13 +1743,8 @@ License: MIT } }; - if (config.header && fields.length > 0) { - var copiedFields = []; // prevent user from mutating internal state - for (var i = 0; i < fields.length; i++) { - copiedFields[i] = fields[i]; - } - results.meta.fields = copiedFields; - } + if (config.header && fields.length > 0) + results.meta.fields = fields.concat([]); return results; } From 80a8a929d1124f074daee1aaecc05afa328abc1d Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 21 Apr 2023 09:43:28 -0700 Subject: [PATCH 6/7] Moved new test to bottom, reduced test data size --- tests/test-cases.js | 84 +++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/tests/test-cases.js b/tests/test-cases.js index b2b36b74..3bb805ba 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -2027,51 +2027,6 @@ describe('Unparse Tests', function() { var CUSTOM_TESTS = [ - { - description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", - expected: [[ - ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], - ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], - ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"], - ["Column 1", "Column 2", "Column 3", "Column 4", "Column 5"] - ], [ - { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "", "Column 5": "" }, - { "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "", "Column 5": "" }, - { "Column 1": "R3C1", "Column 2": "", "Column 3": "", "Column 4": "R3C4", "Column 5": "" }, - { "Column 1": "R4C1", "Column 2": "", "Column 3": "", "Column 4": "", "Column 5": "" }, - ]], - run: function(callback) { - var inputString = [ - "Column 1,Column 2,Column 3,Column 4,Column 5", - "R1C1,,R1C3,,", - "R2C1,,,,", - "R3C1,,,R3C4,", - "R4C1,,,," - ].join("\n"); - var output = []; - var dataRows = []; - var headerResults = []; - Papa.parse(inputString, { - header: true, - step: function(results, parser) { - if (results) - { - headerResults.push(results.meta.fields); - parser.pause(); - parser.resume(); - if (results.data) { - dataRows.push(results.data); - } - } - }, - complete: function() { - output.push(headerResults); - output.push(dataRows); - callback(output); - } - }); - } - }, { description: "Pause and resume works (Regression Test for Bug #636)", disabled: !XHR_ENABLED, @@ -2728,6 +2683,45 @@ var CUSTOM_TESTS = [ }); } }, + { + description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", + expected: [[ + ["Column 1", "Column 2", "Column 3"], + ["Column 1", "Column 2", "Column 3"], + ], [ + { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3" }, + { "Column 1": "R2C1", "Column 2": "", "Column 3": "" }, + ]], + run: function(callback) { + var inputString = [ + "Column 1,Column 2,Column 3", + "R1C1,,R1C3", + "R2C1,," + ].join("\n"); + var output = []; + var dataRows = []; + var headerResults = []; + Papa.parse(inputString, { + header: true, + step: function(results, parser) { + if (results) + { + headerResults.push(results.meta.fields); + parser.pause(); + parser.resume(); + if (results.data) { + dataRows.push(results.data); + } + } + }, + complete: function() { + output.push(headerResults); + output.push(dataRows); + callback(output); + } + }); + } + }, ]; describe('Custom Tests', function() { From 34fc5a0ab628d9f5fce89c18ec454f32603ab3a8 Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Fri, 21 Apr 2023 09:47:22 -0700 Subject: [PATCH 7/7] Added a column back into the test This lets us test duplicates in first row --- tests/test-cases.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test-cases.js b/tests/test-cases.js index 3bb805ba..9be9e531 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -2686,17 +2686,17 @@ var CUSTOM_TESTS = [ { description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", expected: [[ - ["Column 1", "Column 2", "Column 3"], - ["Column 1", "Column 2", "Column 3"], + ["Column 1", "Column 2", "Column 3", "Column 4"], + ["Column 1", "Column 2", "Column 3", "Column 4"], ], [ - { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3" }, - { "Column 1": "R2C1", "Column 2": "", "Column 3": "" }, + { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "" }, + { "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "" }, ]], run: function(callback) { var inputString = [ - "Column 1,Column 2,Column 3", - "R1C1,,R1C3", - "R2C1,," + "Column 1,Column 2,Column 3,Column 4", + "R1C1,,R1C3,", + "R2C1,,," ].join("\n"); var output = []; var dataRows = [];