diff --git a/papaparse.js b/papaparse.js index 1fd7dc4c..8448e197 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1097,11 +1097,8 @@ License: MIT } var parserConfig = copy(_config); - if (_config.preview && _config.header) - parserConfig.preview++; // to compensate for header row - _input = input; - _parser = new Parser(parserConfig); + _parser = new Parser(parserConfig, _fields); _results = _parser.parse(_input, baseIndex, ignoreLastRow); processResults(); return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); @@ -1178,8 +1175,9 @@ License: MIT }); } - if (needsHeaderRow()) - fillHeaderFields(); + if (needsHeaderRow() && _results.meta.fields) { + _fields = _fields.concat(_results.meta.fields); + } return applyHeaderAndDynamicTypingAndTransformation(); } @@ -1189,31 +1187,6 @@ License: MIT return _config.header && _fields.length === 0; } - function fillHeaderFields() - { - if (!_results) - return; - - function addHeader(header, i) - { - if (isFunction(_config.transformHeader)) - header = _config.transformHeader(header, i); - - _fields.push(header); - } - - if (Array.isArray(_results.data[0])) - { - for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) - _results.data[i].forEach(addHeader); - - _results.data.splice(0, 1); - } - // if _results.data[0] is not an array, we are in a step where _results.data is the row. - else - _results.data.forEach(addHeader); - } - function shouldApplyDynamicTyping(field) { // Cache function values to avoid calling it for each row if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) { @@ -1316,7 +1289,7 @@ License: MIT delimiter: delim, newline: newline, preview: 10 - }).parse(input); + }, _fields).parse(input); for (var j = 0; j < preview.data.length; j++) { if (skipEmptyLines && testEmptyLine(preview.data[j])) { @@ -1402,7 +1375,7 @@ License: MIT } /** The core parser implements speedy and correct CSV parsing */ - function Parser(config) + function Parser(config, _fields) { // Unpack the config object config = config || {}; @@ -1444,6 +1417,7 @@ License: MIT // We're gonna need these at the Parser scope var cursor = 0; var aborted = false; + var fields = _fields; this.parse = function(input, baseIndex, ignoreLastRow) { @@ -1466,40 +1440,6 @@ License: MIT if (!input) return returnable(); - // Rename headers if there are duplicates - if (config.header && !baseIndex) - { - var firstLine = input.split(newline)[0]; - var headers = firstLine.split(delim); - var separator = '_'; - var headerMap = []; - var headerCount = {}; - var duplicateHeaders = false; - - for (var j in headers) { - var header = headers[j]; - if (isFunction(config.transformHeader)) - header = config.transformHeader(header, j); - var headerName = header; - - var count = headerCount[header] || 0; - if (count > 0) { - duplicateHeaders = true; - headerName = header + separator + count; - } - headerCount[header] = count + 1; - // In case it already exists, we add more separtors - while (headerMap.includes(headerName)) { - headerName = headerName + separator + count; - } - headerMap.push(headerName); - } - if (duplicateHeaders) { - var editedInput = input.split(newline); - editedInput[0] = headerMap.join(delim); - input = editedInput.join(newline); - } - } if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) { var rows = input.split(newline); @@ -1707,10 +1647,40 @@ License: MIT function pushRow(row) { - data.push(row); + if (config.header && (fields.length === 0)) + buildHeaders(row); + else + data.push(row); lastCursor = cursor; } + function buildHeaders(headers) { + // duplicate headers will have '_x" appended to them. + var separator = '_'; + var headerMap = []; + var headerCount = {}; + + for (var j in headers) { + var header = headers[j]; + if (isFunction(config.transformHeader)) + header = config.transformHeader(header, j); + var headerName = header; + + var count = headerCount[header] || 0; + if (count > 0) { + headerName = header + separator + count; + } + headerCount[header] = count + 1; + // In case it already exists, we add more separtors + while (headerMap.includes(headerName)) { + headerName = headerName + separator + count; + } + headerMap.push(headerName); + } + + fields = headerMap; + } + /** * checks if there are extra spaces after closing quote and given index without any text * if Yes, returns the number of spaces @@ -1761,7 +1731,7 @@ License: MIT /** Returns an object with the results, errors, and meta. */ function returnable(stopped) { - return { + var results = { data: data, errors: errors, meta: { @@ -1772,6 +1742,11 @@ License: MIT cursor: lastCursor + (baseIndex || 0) } }; + + if (config.header && fields.length > 0) + results.meta.fields = fields.concat([]); + + return results; } /** Executes the user's step function and resets data & errors. */ diff --git a/tests/test-cases.js b/tests/test-cases.js index 0243a2c5..9be9e531 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -591,8 +591,11 @@ var CORE_PARSER_TESTS = [ input: 'A,A,A,A\n1,2,3,4', config: { header: true }, expected: { - data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['A', 'A_1', 'A_2', 'A_3'] + } } }, { @@ -600,8 +603,11 @@ var CORE_PARSER_TESTS = [ input: 'A,A,A,A\n1,2,3,4', config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } }, expected: { - data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['a', 'a_1', 'a_2', 'a_3'] + } } }, { @@ -609,8 +615,11 @@ var CORE_PARSER_TESTS = [ input: 'c,c,c,c_1\n1,2,3,4', config: { header: true }, expected: { - data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']], - errors: [] + data: [['1', '2', '3', '4']], + errors: [], + meta: { + fields: ['c', 'c_1', 'c_2', 'c_1_0'] + } } }, ]; @@ -618,7 +627,7 @@ var CORE_PARSER_TESTS = [ describe('Core Parser Tests', function() { function generateTest(test) { (test.disabled ? it.skip : it)(test.description, function() { - var actual = new Papa.Parser(test.config).parse(test.input); + var actual = new Papa.Parser(test.config, []).parse(test.input); assert.deepEqual(actual.errors, test.expected.errors); assert.deepEqual(actual.data, test.expected.data); }); @@ -2674,6 +2683,45 @@ var CUSTOM_TESTS = [ }); } }, + { + description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)", + expected: [[ + ["Column 1", "Column 2", "Column 3", "Column 4"], + ["Column 1", "Column 2", "Column 3", "Column 4"], + ], [ + { "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "" }, + { "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "" }, + ]], + run: function(callback) { + var inputString = [ + "Column 1,Column 2,Column 3,Column 4", + "R1C1,,R1C3,", + "R2C1,,," + ].join("\n"); + var output = []; + var dataRows = []; + var headerResults = []; + Papa.parse(inputString, { + header: true, + step: function(results, parser) { + if (results) + { + headerResults.push(results.meta.fields); + parser.pause(); + parser.resume(); + if (results.data) { + dataRows.push(results.data); + } + } + }, + complete: function() { + output.push(headerResults); + output.push(dataRows); + callback(output); + } + }); + } + }, ]; describe('Custom Tests', function() {