diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 8a822954ac421..554008f6d9d76 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1732,6 +1732,7 @@ class PartialEvaluator { const stateManager = new StateManager(initialState); const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); const timeSlotManager = new TimeSlotManager(); + let markedContentLevel = 0; function closePendingRestoreOPS(argument) { for (let i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { @@ -1753,7 +1754,7 @@ class PartialEvaluator { timeSlotManager.reset(); const operation = {}; - let stop, i, ii, cs, name, isValidName; + let stop, cs, name, isValidName; while (!(stop = timeSlotManager.check())) { // The arguments parsed by read() are used beyond this loop, so we // cannot reuse the same array on each iteration. Therefore we pass @@ -1914,6 +1915,12 @@ class PartialEvaluator { break; case OPS.endText: parsingText = false; + if (markedContentLevel !== 0) { + for (let i = 0; i < markedContentLevel; i++) { + operatorList.addOp(OPS.endMarkedContent, []); + } + markedContentLevel = 0; + } break; case OPS.endInlineImage: var cacheKey = args[0].cacheKey; @@ -2227,6 +2234,7 @@ class PartialEvaluator { // but doing so is meaningless without knowing the semantics. continue; case OPS.beginMarkedContentProps: + markedContentLevel++; if (!(args[0] instanceof Name)) { warn(`Expected name for beginMarkedContentProps arg0=${args[0]}`); operatorList.addOp(OPS.beginMarkedContentProps, ["OC", null]); @@ -2269,21 +2277,26 @@ class PartialEvaluator { break; case OPS.beginMarkedContent: + markedContentLevel++; + if (args?.some(arg => arg instanceof Dict)) { + warn(`getOperatorList - ignoring operator: ${fn}`); + continue; + } + break; case OPS.endMarkedContent: + markedContentLevel--; + if (args?.some(arg => arg instanceof Dict)) { + warn(`getOperatorList - ignoring operator: ${fn}`); + continue; + } + break; default: // Note: Ignore the operator if it has `Dict` arguments, since // those are non-serializable, otherwise postMessage will throw // "An object could not be cloned.". - if (args !== null) { - for (i = 0, ii = args.length; i < ii; i++) { - if (args[i] instanceof Dict) { - break; - } - } - if (i < ii) { - warn("getOperatorList - ignoring operator: " + fn); - continue; - } + if (args?.some(arg => arg instanceof Dict)) { + warn(`getOperatorList - ignoring operator: ${fn}`); + continue; } } operatorList.addOp(fn, args); @@ -3142,6 +3155,19 @@ class PartialEvaluator { textState.textMatrix = IDENTITY_MATRIX.slice(); textState.textLineMatrix = IDENTITY_MATRIX.slice(); break; + case OPS.endText: + if (includeMarkedContent) { + if (markedContentData.level !== 0) { + flushTextContentItem(); + for (let i = 0; i < markedContentData.level; i++) { + textContent.items.push({ + type: "endMarkedContent", + }); + } + markedContentData.level = 0; + } + } + break; case OPS.showSpacedText: if (!stateManager.state.font) { self.ensureStateFont(stateManager.state); diff --git a/test/integration/text_layer_spec.mjs b/test/integration/text_layer_spec.mjs index 1fd8296b808ba..f3b0b994b7cf0 100644 --- a/test/integration/text_layer_spec.mjs +++ b/test/integration/text_layer_spec.mjs @@ -290,4 +290,30 @@ describe("Text layer", () => { }); }); }); + + describe("check the dom depth (bug 1898053) ", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait( + "bug1898053_minimal.pdf", + ".textLayer .endOfContent" + ); + }); + afterAll(async () => { + await closePages(pages); + }); + + it("must have the right number of children", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + const count = await page.evaluate( + () => + document.querySelectorAll(".textLayer > .markedContent").length + ); + expect(count).toBe(6); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 733a26e08e12a..059b9c040c852 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -649,3 +649,4 @@ !tracemonkey_freetext.pdf !issue17998.pdf !pdfjs_wikipedia.pdf +!bug1898053_minimal.pdf diff --git a/test/pdfs/bug1898053_minimal.pdf b/test/pdfs/bug1898053_minimal.pdf new file mode 100755 index 0000000000000..5c0586aa0477d Binary files /dev/null and b/test/pdfs/bug1898053_minimal.pdf differ