Skip to content

Commit

Permalink
Allow to insert several annotations under the same parent in the stru…
Browse files Browse the repository at this point in the history
…cture tree

While testing stamp insertion with the added pdf, I noticed that the tags using a MCID
weren't considered when trying to attach an annotation to it.
  • Loading branch information
calixteman committed Apr 24, 2024
1 parent dafc4f6 commit 45fa867
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 111 deletions.
6 changes: 6 additions & 0 deletions src/core/primitives.js
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,12 @@ class RefSetCache {
clear() {
this._map.clear();
}

*items() {
for (const [ref, value] of this._map) {
yield [Ref.fromString(ref), value];
}
}
}

function isName(v, name) {
Expand Down
208 changes: 98 additions & 110 deletions src/core/struct_tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,19 @@ class StructTreeRoot {
newRefs,
}) {
const root = pdfManager.catalog.cloneDict();
const cache = new RefSetCache();
cache.put(catalogRef, root);

const structTreeRootRef = xref.getNewTemporaryRef();
root.set("StructTreeRoot", structTreeRootRef);

const buffer = [];
await writeObject(catalogRef, root, buffer, xref);
newRefs.push({ ref: catalogRef, data: buffer.join("") });

const structTreeRoot = new Dict(xref);
structTreeRoot.set("Type", Name.get("StructTreeRoot"));
const parentTreeRef = xref.getNewTemporaryRef();
structTreeRoot.set("ParentTree", parentTreeRef);
const kids = [];
structTreeRoot.set("K", kids);
cache.put(structTreeRootRef, structTreeRoot);

const parentTree = new Dict(xref);
const nums = [];
Expand All @@ -144,18 +144,18 @@ class StructTreeRoot {
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
});
structTreeRoot.set("ParentTreeNextKey", nextKey);

buffer.length = 0;
await writeObject(parentTreeRef, parentTree, buffer, xref);
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
cache.put(parentTreeRef, parentTree);

buffer.length = 0;
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
const buffer = [];
for (const [ref, obj] of cache.items()) {
buffer.length = 0;
await writeObject(ref, obj, buffer, xref);
newRefs.push({ ref, data: buffer.join("") });
}
}

async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) {
Expand Down Expand Up @@ -232,6 +232,8 @@ class StructTreeRoot {
const xref = this.dict.xref;
const structTreeRoot = this.dict.clone();
const structTreeRootRef = this.ref;
const cache = new RefSetCache();
cache.put(structTreeRootRef, structTreeRoot);

let parentTreeRef = structTreeRoot.getRaw("ParentTree");
let parentTree;
Expand All @@ -243,6 +245,7 @@ class StructTreeRoot {
structTreeRoot.set("ParentTree", parentTreeRef);
}
parentTree = parentTree.clone();
cache.put(parentTreeRef, parentTree);

let nums = parentTree.getRaw("Nums");
let numsRef = null;
Expand All @@ -255,47 +258,27 @@ class StructTreeRoot {
parentTree.set("Nums", nums);
}

let kids = structTreeRoot.getRaw("K");
let kidsRef = null;
if (kids instanceof Ref) {
kidsRef = kids;
kids = xref.fetch(kidsRef);
} else {
kidsRef = xref.getNewTemporaryRef();
structTreeRoot.set("K", kidsRef);
}
kids = Array.isArray(kids) ? kids.slice() : [kids];

const buffer = [];
const newNextkey = await StructTreeRoot.#writeKids({
newAnnotationsByPage,
structTreeRootRef,
kids,
kids: null,
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
});
structTreeRoot.set("ParentTreeNextKey", newNextkey);

buffer.length = 0;
await writeObject(kidsRef, kids, buffer, xref);
newRefs.push({ ref: kidsRef, data: buffer.join("") });

if (numsRef) {
buffer.length = 0;
await writeObject(numsRef, nums, buffer, xref);
newRefs.push({ ref: numsRef, data: buffer.join("") });
cache.put(numsRef, nums);
}

buffer.length = 0;
await writeObject(parentTreeRef, parentTree, buffer, xref);
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });

buffer.length = 0;
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
const buffer = [];
for (const [ref, obj] of cache.items()) {
buffer.length = 0;
await writeObject(ref, obj, buffer, xref);
newRefs.push({ ref, data: buffer.join("") });
}
}

static async #writeKids({
Expand All @@ -305,8 +288,7 @@ class StructTreeRoot {
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
}) {
const objr = Name.get("OBJR");
let nextKey = -Infinity;
Expand Down Expand Up @@ -349,19 +331,15 @@ class StructTreeRoot {
tagDict.set("ActualText", actualText);
}

if (structTreeParent) {
await this.#updateParentTag({
structTreeParent,
tagDict,
newTagRef: tagRef,
fallbackRef: structTreeRootRef,
xref,
newRefs,
buffer,
});
} else {
tagDict.set("P", structTreeRootRef);
}
await this.#updateParentTag({
structTreeParent,
tagDict,
newTagRef: tagRef,
structTreeRootRef,
fallbackKids: kids,
xref,
cache,
});

const objDict = new Dict(xref);
tagDict.set("K", objDict);
Expand All @@ -372,23 +350,24 @@ class StructTreeRoot {
}
objDict.set("Obj", ref);

buffer.length = 0;
await writeObject(tagRef, tagDict, buffer, xref);
newRefs.push({ ref: tagRef, data: buffer.join("") });

cache.put(tagRef, tagDict);
nums.push(parentTreeId, tagRef);
kids.push(tagRef);
}
}
return nextKey + 1;
}

static #collectParents({ elements, xref, pageDict, numberTree }) {
const idToElement = new Map();
const idToElements = new Map();
for (const element of elements) {
if (element.structTreeParentId) {
const id = parseInt(element.structTreeParentId.split("_mc")[1], 10);
idToElement.set(id, element);
let elems = idToElements.get(id);
if (!elems) {
elems = [];
idToElements.set(id, elems);
}
elems.push(element);
}
}

Expand All @@ -400,13 +379,16 @@ class StructTreeRoot {
const parentArray = numberTree.get(id);

const updateElement = (kid, pageKid, kidRef) => {
const element = idToElement.get(kid);
if (element) {
const elems = idToElements.get(kid);
if (elems) {
const parentRef = pageKid.getRaw("P");
const parentDict = xref.fetchIfRef(parentRef);
if (parentRef instanceof Ref && parentDict instanceof Dict) {
// It should always the case, but we check just in case.
element.structTreeParent = { ref: kidRef, dict: pageKid };
const params = { ref: kidRef, dict: pageKid };
for (const element of elems) {
element.structTreeParent = params;
}
}
return true;
}
Expand All @@ -431,67 +413,73 @@ class StructTreeRoot {
if (Number.isInteger(kid) && updateElement(kid, pageKid, kidRef)) {
break;
}
if (!(kid instanceof Dict)) {
continue;
}
if (!isName(kid.get("Type"), "MCR")) {
break;
}
const mcid = kid.get("MCID");
if (Number.isInteger(mcid) && updateElement(mcid, pageKid, kidRef)) {
break;
}
}
}
}

static async #updateParentTag({
structTreeParent: { ref, dict },
structTreeParent,
tagDict,
newTagRef,
fallbackRef,
structTreeRootRef,
fallbackKids,
xref,
newRefs,
buffer,
cache,
}) {
// We get the parent of the tag.
const parentRef = dict.getRaw("P");
let parentDict = xref.fetchIfRef(parentRef);

tagDict.set("P", parentRef);
let ref = null;
let parentRef;
if (structTreeParent) {
({ ref } = structTreeParent);

// We get the kids in order to insert a new tag at the right position.
let saveParentDict = false;
let parentKids;
let parentKidsRef = parentDict.getRaw("K");
if (!(parentKidsRef instanceof Ref)) {
parentKids = parentKidsRef;
parentKidsRef = xref.getNewTemporaryRef();
parentDict = parentDict.clone();
parentDict.set("K", parentKidsRef);
saveParentDict = true;
// We get the parent of the tag.
parentRef = structTreeParent.dict.getRaw("P") || structTreeRootRef;
} else {
parentKids = xref.fetch(parentKidsRef);
}

if (Array.isArray(parentKids)) {
const index = parentKids.indexOf(ref);
if (index >= 0) {
parentKids = parentKids.slice();
parentKids.splice(index + 1, 0, newTagRef);
} else {
warn("Cannot update the struct tree: parent kid not found.");
tagDict.set("P", fallbackRef);
return;
}
} else if (parentKids instanceof Dict) {
parentKids = [parentKidsRef, newTagRef];
parentKidsRef = xref.getNewTemporaryRef();
parentDict.set("K", parentKidsRef);
saveParentDict = true;
parentRef = structTreeRootRef;
}

buffer.length = 0;
await writeObject(parentKidsRef, parentKids, buffer, xref);
newRefs.push({ ref: parentKidsRef, data: buffer.join("") });
tagDict.set("P", parentRef);

if (!saveParentDict) {
// We get the kids in order to insert a new tag at the right position.
const parentDict = xref.fetchIfRef(parentRef);
if (!parentDict) {
fallbackKids.push(newTagRef);
return;
}

buffer.length = 0;
await writeObject(parentRef, parentDict, buffer, xref);
newRefs.push({ ref: parentRef, data: buffer.join("") });
let cachedParentDict = cache.get(parentRef);
if (!cachedParentDict) {
cachedParentDict = parentDict.clone();
cache.put(parentRef, cachedParentDict);
}
const parentKidsRaw = cachedParentDict.getRaw("K");
let cachedParentKids =
parentKidsRaw instanceof Ref ? cache.get(parentKidsRaw) : null;
if (!cachedParentKids) {
cachedParentKids = xref.fetchIfRef(parentKidsRaw);
cachedParentKids = Array.isArray(cachedParentKids)
? cachedParentKids.slice()
: [parentKidsRaw];
const parentKidsRef = xref.getNewTemporaryRef();
cachedParentDict.set("K", parentKidsRef);
cache.put(parentKidsRef, cachedParentKids);
}

const index = cachedParentKids.indexOf(ref);
cachedParentKids.splice(
index >= 0 ? index + 1 : cachedParentKids.length,
0,
newTagRef
);
}
}

Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,4 @@
!issue12213.pdf
!tracemonkey_freetext.pdf
!issue17998.pdf
!pdfjs_wikipedia.pdf
Binary file added test/pdfs/pdfjs_wikipedia.pdf
Binary file not shown.
Loading

0 comments on commit 45fa867

Please sign in to comment.