Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More fix cursor for duplicate header #1

Merged
merged 3 commits into from
Aug 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions papaparse.js
Original file line number Diff line number Diff line change
@@ -1463,7 +1463,7 @@ License: MIT

// Establish starting state
cursor = 0;
var data = [], errors = [], row = [], lastCursor = 0;
var data = [], errors = [], row = [], lastCursor = 0, inputExpansion = 0;

if (!input)
return returnable();
@@ -1508,7 +1508,17 @@ License: MIT
if (duplicateHeaders) {
var editedInput = input.split(newline);
editedInput[0] = Array.from(headerMap).join(delim);
// If we change the size of the input due to duplicate headers
// or header renaming from transformHeader, then we need to
// record the difference so that we can adjust the cursor accordingly
// in `meta.cursor` value of the `parse` result.
// This is because the consumers of this method (e.g. ChunkStreamer)
// use the resulting `cursor` value to know how much of the input was
// consumed by the parser and are not aware of the parser implementation
// details for handling duplicate headers.
inputExpansion = editedInput[0].length - firstLine.length;
input = editedInput.join(newline);
inputLen = input.length;
}
}
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
@@ -1517,12 +1527,7 @@ License: MIT
for (var i = 0; i < rows.length; i++)
{
row = rows[i];
// use firstline as row length may be changed due to duplicated headers
if (i === 0 && firstLine !== undefined) {
cursor += firstLine.length;
}else{
cursor += row.length;
}
cursor += row.length;
if (i !== rows.length - 1)
cursor += newline.length;
else if (ignoreLastRow)
@@ -1785,7 +1790,7 @@ License: MIT
linebreak: newline,
aborted: aborted,
truncated: !!stopped,
cursor: lastCursor + (baseIndex || 0),
cursor: lastCursor + (baseIndex || 0) - inputExpansion,
renamedHeaders: renamedHeaders
}
};
40 changes: 40 additions & 0 deletions tests/node-tests.js
Original file line number Diff line number Diff line change
@@ -164,6 +164,46 @@ describe('PapaParse', function() {
});
});

it('Checks cursor when file is large and has duplicate headers', function(done) {
this.timeout(30000);
var stepped = 0;
var startsWithEtiamOrLorem = true;
Papa.parse(fs.createReadStream(__dirname + '/verylong-sample.csv'), {
header: true,
transformHeader: function(headerName) {
return headerName === 'meaning of life' ? 'placeholder' : headerName;
},
step: function(results, parser) {
stepped++;
if (results)
{
if (stepped > 1) {
const startsWithEtiam = results.data && results.data.placeholder && results.data.placeholder.startsWith("Etiam");
const startsWithLorem = results.data && results.data.placeholder && results.data.placeholder.startsWith("Lorem");
startsWithEtiamOrLorem = startsWithEtiamOrLorem && (startsWithEtiam || startsWithLorem);
}
}
},
complete: function() {
assert(startsWithEtiamOrLorem);
done();
}
});
});

it('Handles quote at EOF when headers are modified', function(done) {
var data = [];
Papa.parse('field1,field1,field3\na,b,c\nd,e,"f"', {
header: true,
step: function(results) {
data.push(results.data);
},
complete: function() {
assert.deepEqual(data, [{ field1: 'a', field1_1: 'b', field3: 'c' },{ field1: 'd', field1_1: 'e', field3: 'f' }]);
done();
}
});
});

it('piped streaming CSV should be correctly parsed when header is true', function(done) {
var data = [];