mholt · stuart-marshall · Aug 20, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 22, 2023
diff --git a/papaparse.js b/papaparse.js
@@ -1463,7 +1463,7 @@ License: MIT
 
 			// Establish starting state
 			cursor = 0;
-			var data = [], errors = [], row = [], lastCursor = 0;
+			var data = [], errors = [], row = [], lastCursor = 0, inputExpansion = 0;
 
 			if (!input)
 				return returnable();
@@ -1508,7 +1508,17 @@ License: MIT
 				if (duplicateHeaders) {
 					var editedInput = input.split(newline);
 					editedInput[0] = Array.from(headerMap).join(delim);
+					// If we change the size of the input due to duplicate headers
+					// or header renaming from transformHeader, then we need to
+					// record the difference so that we can adjust the cursor accordingly
+					// in `meta.cursor` value of the `parse` result.
+					// This is because the consumers of this method (e.g. ChunkStreamer)
+					// use the resulting `cursor` value to know how much of the input was
+					// consumed by the parser and are not aware of the parser implementation
+					// details for handling duplicate headers.
+					inputExpansion = editedInput[0].length - firstLine.length;
 					input = editedInput.join(newline);
+					inputLen = input.length;
 				}
 			}
 			if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
@@ -1517,12 +1527,7 @@ License: MIT
 				for (var i = 0; i < rows.length; i++)
 				{
 					row = rows[i];
-					// use firstline as row length may be changed due to duplicated headers
-					if (i === 0 && firstLine !== undefined) {
-						cursor += firstLine.length;
-					}else{
-						cursor += row.length;
-					}
+					cursor += row.length;
 					if (i !== rows.length - 1)
 						cursor += newline.length;
 					else if (ignoreLastRow)
@@ -1785,7 +1790,7 @@ License: MIT
 						linebreak: newline,
 						aborted: aborted,
 						truncated: !!stopped,
-						cursor: lastCursor + (baseIndex || 0),
+						cursor: lastCursor + (baseIndex || 0) - inputExpansion,
 						renamedHeaders: renamedHeaders
 					}
 				};

diff --git a/tests/node-tests.js b/tests/node-tests.js
@@ -164,6 +164,46 @@ describe('PapaParse', function() {
 		});
 	});
 
+	it('Checks cursor when file is large and has duplicate headers', function(done) {
+		this.timeout(30000);
+		var stepped = 0;
+		var startsWithEtiamOrLorem = true;
+		Papa.parse(fs.createReadStream(__dirname + '/verylong-sample.csv'), {
+			header: true,
+			transformHeader: function(headerName) {
+				return headerName === 'meaning of life' ? 'placeholder' : headerName;
+			},
+			step: function(results, parser) {
+				stepped++;
+				if (results)
+				{
+					if (stepped > 1) {
+						const startsWithEtiam = results.data && results.data.placeholder && results.data.placeholder.startsWith("Etiam");
+						const startsWithLorem = results.data && results.data.placeholder && results.data.placeholder.startsWith("Lorem");
+						startsWithEtiamOrLorem = startsWithEtiamOrLorem && (startsWithEtiam || startsWithLorem);
+					}
+				}
+			},
+			complete: function() {
+				assert(startsWithEtiamOrLorem);
+				done();
+			}
+		});
+	});
+
+	it('Handles quote at EOF when headers are modified', function(done) {
+		var data = [];
+		Papa.parse('field1,field1,field3\na,b,c\nd,e,"f"', {
+			header: true,
+			step: function(results) {
+				data.push(results.data);
+			},
+			complete: function() {
+				assert.deepEqual(data, [{ field1: 'a', field1_1: 'b', field3: 'c' },{ field1: 'd', field1_1: 'e', field3: 'f' }]);
+				done();
+			}
+		});
+	});
 
 	it('piped streaming CSV should be correctly parsed when header is true', function(done) {
 		var data = [];