Skip to content

Commit

Permalink
Avoid re-parsing global images that failed decoding (issue 18042, PR …
Browse files Browse the repository at this point in the history
…17428 follow-up)

For images that failed to decode once we want to avoid a pointless round-trip to the main-thread, which could otherwise happen for globally cached images.
  • Loading branch information
Snuffleupagus committed May 6, 2024
1 parent 1b811ac commit 6f821db
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 23 deletions.
47 changes: 31 additions & 16 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -797,30 +797,42 @@ class PartialEvaluator {
args = [objId, w, h];
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);

// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (
cacheGlobally &&
(w * h > 250000 || dict.has("SMask") || dict.has("Mask"))
) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);

if (localLength) {
if (cacheGlobally) {
if (this.globalImageCache.hasDecodeFailed(imageRef)) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
byteSize: 0, // Data is `null`, since decoding failed previously.
});
this.globalImageCache.addByteSize(imageRef, localLength);

this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
return;
}

// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);

if (localLength) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
});
this.globalImageCache.addByteSize(imageRef, localLength);
return;
}
}
}

PDFImage.buildImage({
Expand Down Expand Up @@ -850,6 +862,9 @@ class PartialEvaluator {
.catch(reason => {
warn(`Unable to decode image "${objId}": "${reason}".`);

if (imageRef) {
this.globalImageCache.addDecodeFailed(imageRef);
}
return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
});

Expand Down
23 changes: 17 additions & 6 deletions src/core/image_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
unreachable,
warn,
} from "../shared/util.js";
import { RefSetCache } from "./primitives.js";
import { RefSet, RefSetCache } from "./primitives.js";

class BaseLocalCache {
constructor(options) {
Expand Down Expand Up @@ -178,6 +178,8 @@ class GlobalImageCache {

static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE;

#decodeFailedSet = new RefSet();

constructor() {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
Expand All @@ -189,19 +191,19 @@ class GlobalImageCache {
this._imageCache = new RefSetCache();
}

get _byteSize() {
get #byteSize() {
let byteSize = 0;
for (const imageData of this._imageCache) {
byteSize += imageData.byteSize;
}
return byteSize;
}

get _cacheLimitReached() {
get #cacheLimitReached() {
if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) {
return false;
}
if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
return false;
}
return true;
Expand All @@ -218,12 +220,20 @@ class GlobalImageCache {
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return false;
}
if (!this._imageCache.has(ref) && this._cacheLimitReached) {
if (!this._imageCache.has(ref) && this.#cacheLimitReached) {
return false;
}
return true;
}

addDecodeFailed(ref) {
this.#decodeFailedSet.put(ref);
}

hasDecodeFailed(ref) {
return this.#decodeFailedSet.has(ref);
}

/**
* PLEASE NOTE: Must be called *after* the `setData` method.
*/
Expand Down Expand Up @@ -265,7 +275,7 @@ class GlobalImageCache {
if (this._imageCache.has(ref)) {
return;
}
if (this._cacheLimitReached) {
if (this.#cacheLimitReached) {
warn("GlobalImageCache.setData - cache limit reached.");
return;
}
Expand All @@ -274,6 +284,7 @@ class GlobalImageCache {

clear(onlyData = false) {
if (!onlyData) {
this.#decodeFailedSet.clear();
this._refCache.clear();
}
this._imageCache.clear();
Expand Down
2 changes: 1 addition & 1 deletion src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -2790,7 +2790,7 @@ class WorkerTransport {

for (const pageProxy of this.#pageCache.values()) {
for (const [, data] of pageProxy.objs) {
if (data.ref !== imageRef) {
if (data?.ref !== imageRef) {
continue;
}
if (!data.dataLen) {
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
!issue17679.pdf
!issue17679_2.pdf
!issue18030.pdf
!issue18042.pdf
!issue14953.pdf
!issue15367.pdf
!issue15372.pdf
Expand Down
Binary file added test/pdfs/issue18042.pdf
Binary file not shown.
59 changes: 59 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -4201,6 +4201,65 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
firstStatsOverall = null;
});

it("caches image resources at the document/page level, with corrupt images (issue 18042)", async function () {
const { NUM_PAGES_THRESHOLD } = GlobalImageCache;

const loadingTask = getDocument(buildGetDocumentParams("issue18042.pdf"));
const pdfDoc = await loadingTask.promise;
let checkedGlobalDecodeFailed = false;

for (let i = 1; i <= pdfDoc.numPages; i++) {
const pdfPage = await pdfDoc.getPage(i);
const viewport = pdfPage.getViewport({ scale: 1 });

const canvasAndCtx = CanvasFactory.create(
viewport.width,
viewport.height
);
const renderTask = pdfPage.render({
canvasContext: canvasAndCtx.context,
viewport,
});

await renderTask.promise;
const opList = renderTask.getOperatorList();
// The canvas is no longer necessary, since we only care about
// the image-data below.
CanvasFactory.destroy(canvasAndCtx);

const { commonObjs, objs } = pdfPage;
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
const [objId] = opList.argsArray[imgIndex];

if (i < NUM_PAGES_THRESHOLD) {
expect(objId).toEqual(`img_p${i - 1}_1`);

expect(objs.has(objId)).toEqual(true);
expect(commonObjs.has(objId)).toEqual(false);
} else {
expect(objId).toEqual(
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
);

expect(objs.has(objId)).toEqual(false);
expect(commonObjs.has(objId)).toEqual(true);
}

// Ensure that the actual image data is identical for all pages.
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
const imgData = objsPool.get(objId);

expect(imgData).toBe(null);

if (i === NUM_PAGES_THRESHOLD) {
checkedGlobalDecodeFailed = true;
}
}
expect(checkedGlobalDecodeFailed).toBeTruthy();

await loadingTask.destroy();
});

it("render for printing, with `printAnnotationStorage` set", async function () {
async function getPrintData(printAnnotationStorage = null) {
const canvasAndCtx = CanvasFactory.create(
Expand Down

0 comments on commit 6f821db

Please sign in to comment.