From 9a122a8be2272851a794cf8a56a97244eb90e7a6 Mon Sep 17 00:00:00 2001
From: Stuart Marshall <stuart.marshall@convoy.com>
Date: Sun, 20 Aug 2023 16:30:42 -0700
Subject: [PATCH 1/3] Handle header line expansion for all parser modes

---
 papaparse.js | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/papaparse.js b/papaparse.js
index 3ce39621..ebbfbc60 100755
--- a/papaparse.js
+++ b/papaparse.js
@@ -1463,7 +1463,7 @@ License: MIT
 
 			// Establish starting state
 			cursor = 0;
-			var data = [], errors = [], row = [], lastCursor = 0;
+			var data = [], errors = [], row = [], lastCursor = 0, inputExpansion = 0;
 
 			if (!input)
 				return returnable();
@@ -1508,6 +1508,10 @@ License: MIT
 				if (duplicateHeaders) {
 					var editedInput = input.split(newline);
 					editedInput[0] = Array.from(headerMap).join(delim);
+					// If we expanded the input due to duplicate headers then reduce cursor
+					// by the amount we expanded the input.
+					// This is needed for keeping leftover aggregate in parseChunk.
+					inputExpansion = editedInput[0].length - firstLine.length;
 					input = editedInput.join(newline);
 				}
 			}
@@ -1517,12 +1521,7 @@ License: MIT
 				for (var i = 0; i < rows.length; i++)
 				{
 					row = rows[i];
-					// use firstline as row length may be changed due to duplicated headers
-					if (i === 0 && firstLine !== undefined) {
-						cursor += firstLine.length;
-					}else{
-						cursor += row.length;
-					}
+					cursor += row.length;
 					if (i !== rows.length - 1)
 						cursor += newline.length;
 					else if (ignoreLastRow)
@@ -1724,7 +1723,7 @@ License: MIT
 			function pushRow(row)
 			{
 				data.push(row);
-				lastCursor = cursor;
+				lastCursor = cursor - inputExpansion;
 			}
 
 			/**

From 0136a53c0375208f4472f7455ce7f5472714c536 Mon Sep 17 00:00:00 2001
From: Stuart Marshall <stuart.marshall@convoy.com>
Date: Sun, 20 Aug 2023 23:36:12 -0700
Subject: [PATCH 2/3] Add test for chunked parsing with duplicate header

---
 tests/node-tests.js | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/node-tests.js b/tests/node-tests.js
index cad8058e..b4bbd500 100644
--- a/tests/node-tests.js
+++ b/tests/node-tests.js
@@ -164,6 +164,32 @@ describe('PapaParse', function() {
 		});
 	});
 
+	it('Checks cursor when file is large and has duplicate headers', function(done) {
+		this.timeout(30000);
+		var stepped = 0;
+		var startsWithEtiamOrLorem = true;
+		Papa.parse(fs.createReadStream(__dirname + '/verylong-sample.csv'), {
+			header: true,
+			transformHeader: function(headerName) {
+				return headerName === 'meaning of life' ? 'placeholder' : headerName;
+			},
+			step: function(results, parser) {
+				stepped++;
+				if (results)
+				{
+					if (stepped > 1) {
+						const startsWithEtiam = results.data && results.data.placeholder && results.data.placeholder.startsWith("Etiam");
+						const startsWithLorem = results.data && results.data.placeholder && results.data.placeholder.startsWith("Lorem");
+						startsWithEtiamOrLorem = startsWithEtiamOrLorem && (startsWithEtiam || startsWithLorem);
+					}
+				}
+			},
+			complete: function() {
+				assert(startsWithEtiamOrLorem);
+				done();
+			}
+		});
+	});
 
 	it('piped streaming CSV should be correctly parsed when header is true', function(done) {
 		var data = [];

From ad773e8ecdd42c552138da31547d82bbad086347 Mon Sep 17 00:00:00 2001
From: Stuart Marshall <stuart.marshall@convoy.com>
Date: Mon, 21 Aug 2023 15:00:18 -0700
Subject: [PATCH 3/3] Add test for trailing quote with renamed headers. Adjust
 code comment.

---
 papaparse.js        | 16 +++++++++++-----
 tests/node-tests.js | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/papaparse.js b/papaparse.js
index ebbfbc60..45a094ac 100755
--- a/papaparse.js
+++ b/papaparse.js
@@ -1508,11 +1508,17 @@ License: MIT
 				if (duplicateHeaders) {
 					var editedInput = input.split(newline);
 					editedInput[0] = Array.from(headerMap).join(delim);
-					// If we expanded the input due to duplicate headers then reduce cursor
-					// by the amount we expanded the input.
-					// This is needed for keeping leftover aggregate in parseChunk.
+					// If we change the size of the input due to duplicate headers
+					// or header renaming from transformHeader, then we need to
+					// record the difference so that we can adjust the cursor accordingly
+					// in `meta.cursor` value of the `parse` result.
+					// This is because the consumers of this method (e.g. ChunkStreamer)
+					// use the resulting `cursor` value to know how much of the input was
+					// consumed by the parser and are not aware of the parser implementation
+					// details for handling duplicate headers.
 					inputExpansion = editedInput[0].length - firstLine.length;
 					input = editedInput.join(newline);
+					inputLen = input.length;
 				}
 			}
 			if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
@@ -1723,7 +1729,7 @@ License: MIT
 			function pushRow(row)
 			{
 				data.push(row);
-				lastCursor = cursor - inputExpansion;
+				lastCursor = cursor;
 			}
 
 			/**
@@ -1784,7 +1790,7 @@ License: MIT
 						linebreak: newline,
 						aborted: aborted,
 						truncated: !!stopped,
-						cursor: lastCursor + (baseIndex || 0),
+						cursor: lastCursor + (baseIndex || 0) - inputExpansion,
 						renamedHeaders: renamedHeaders
 					}
 				};
diff --git a/tests/node-tests.js b/tests/node-tests.js
index b4bbd500..9fcbfe72 100644
--- a/tests/node-tests.js
+++ b/tests/node-tests.js
@@ -191,6 +191,20 @@ describe('PapaParse', function() {
 		});
 	});
 
+	it('Handles quote at EOF when headers are modified', function(done) {
+		var data = [];
+		Papa.parse('field1,field1,field3\na,b,c\nd,e,"f"', {
+			header: true,
+			step: function(results) {
+				data.push(results.data);
+			},
+			complete: function() {
+				assert.deepEqual(data, [{ field1: 'a', field1_1: 'b', field3: 'c' },{ field1: 'd', field1_1: 'e', field3: 'f' }]);
+				done();
+			}
+		});
+	});
+
 	it('piped streaming CSV should be correctly parsed when header is true', function(done) {
 		var data = [];
 		var readStream = fs.createReadStream(__dirname + '/sample-header.csv', 'utf8');