Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 473c9f4

Browse files
minorninthSkia Commit-Bot
authored and
Skia Commit-Bot
committed
Tagged PDF annotations must appear in the Parent Tree as well.
In order for a tagged PDF to validate, each annotation (such as a hyperlink) must have a /StructParent entry that links back to the Parent Tree object, which in turn links that annotation to its corresponding struct tree node. The parent tree also contains one entry per page. Both the page entries and the annotation entries need IDs. Since we don't know in advance how many pages will be in the doc at the time we start processing annotations, we start the annotation IDs with a large number (100,000) which effectively serves as the maximum number of pages in a document that we can handle. Bug: chromium:1100712 Change-Id: I5df84c4249ed6a4d21222cfc86b2c0c9b17d6efb Reviewed-on: https://skia-review.googlesource.com/c/skia/+/300254 Auto-Submit: Dominic Mazzoni <[email protected]> Commit-Queue: Ben Wagner <[email protected]> Reviewed-by: Ben Wagner <[email protected]>
1 parent 1436d72 commit 473c9f4

File tree

5 files changed

+99
-17
lines changed

5 files changed

+99
-17
lines changed

src/pdf/SkPDFDevice.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class ScopedOutputMarkedContentTags {
7575
: fOut(out)
7676
, fMarkId(-1) {
7777
if (nodeId) {
78-
fMarkId = document->getMarkIdForNodeId(nodeId);
78+
fMarkId = document->createMarkIdForNodeId(nodeId);
7979
}
8080

8181
if (fMarkId != -1) {

src/pdf/SkPDFDocument.cpp

+14-3
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,17 @@ std::unique_ptr<SkPDFArray> SkPDFDocument::getAnnotations() {
334334
SkDEBUGFAIL("Unknown link type.");
335335
}
336336

337+
if (link->fNodeId) {
338+
int structParentKey = createStructParentKeyForNodeId(link->fNodeId);
339+
if (structParentKey != -1) {
340+
annotation.insertInt("StructParent", structParentKey);
341+
}
342+
}
343+
337344
SkPDFIndirectReference annotationRef = emit(annotation);
338345
array->appendRef(annotationRef);
339346
if (link->fNodeId) {
340-
fTagTree.addNodeAnnotation(link->fNodeId, annotationRef);
347+
fTagTree.addNodeAnnotation(link->fNodeId, annotationRef, SkToUInt(this->currentPageIndex()));
341348
}
342349
}
343350
return array;
@@ -519,8 +526,12 @@ const SkMatrix& SkPDFDocument::currentPageTransform() const {
519526
return fPageDevice->initialTransform();
520527
}
521528

522-
int SkPDFDocument::getMarkIdForNodeId(int nodeId) {
523-
return fTagTree.getMarkIdForNodeId(nodeId, SkToUInt(this->currentPageIndex()));
529+
int SkPDFDocument::createMarkIdForNodeId(int nodeId) {
530+
return fTagTree.createMarkIdForNodeId(nodeId, SkToUInt(this->currentPageIndex()));
531+
}
532+
533+
int SkPDFDocument::createStructParentKeyForNodeId(int nodeId) {
534+
return fTagTree.createStructParentKeyForNodeId(nodeId, SkToUInt(this->currentPageIndex()));
524535
}
525536

526537
static std::vector<const SkPDFFont*> get_fonts(const SkPDFDocument& canon) {

src/pdf/SkPDFDocumentPriv.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,14 @@ class SkPDFDocument : public SkDocument {
116116
SkPDFIndirectReference currentPage() const {
117117
return SkASSERT(!fPageRefs.empty()), fPageRefs.back();
118118
}
119-
// Returns -1 if no mark ID.
120-
int getMarkIdForNodeId(int nodeId);
119+
// Used to allow marked content to refer to its corresponding structure
120+
// tree node, via a page entry in the parent tree. Returns -1 if no
121+
// mark ID.
122+
int createMarkIdForNodeId(int nodeId);
123+
// Used to allow annotations to refer to their corresponding structure
124+
// tree node, via the struct parent tree. Returns -1 if no struct parent
125+
// key.
126+
int createStructParentKeyForNodeId(int nodeId);
121127

122128
std::unique_ptr<SkPDFArray> getAnnotations();
123129

src/pdf/SkPDFTag.cpp

+65-9
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,18 @@ static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
6666
SK_ABORT("bad tag");
6767
}
6868

69+
// The struct parent tree consists of one entry per page, followed by
70+
// entries for individual struct tree nodes corresponding to
71+
// annotations. Each entry is a key/value pair with an integer key
72+
// and an indirect reference key.
73+
//
74+
// The page entries get consecutive keys starting at 0. Since we don't
75+
// know the total number of pages in the document at the time we start
76+
// processing annotations, start the key for annotations with a large
77+
// number, which effectively becomes the maximum number of pages in a
78+
// PDF we can handle.
79+
const int kFirstAnnotationStructParentKey = 100000;
80+
6981
struct SkPDFTagNode {
7082
// Structure element nodes need a unique alphanumeric ID,
7183
// and we need to be able to output them sorted in lexicographic
@@ -97,7 +109,11 @@ struct SkPDFTagNode {
97109
kNo,
98110
} fCanDiscard = kUnknown;
99111
std::unique_ptr<SkPDFArray> fAttributes;
100-
std::vector<SkPDFIndirectReference> fAnnotations;
112+
struct AnnotationInfo {
113+
unsigned fPageIndex;
114+
SkPDFIndirectReference fAnnotationRef;
115+
};
116+
std::vector<AnnotationInfo> fAnnotations;
101117
};
102118

103119
SkPDF::AttributeList::AttributeList() = default;
@@ -238,7 +254,7 @@ void SkPDFTagTree::reset() {
238254
fRoot = nullptr;
239255
}
240256

241-
int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
257+
int SkPDFTagTree::createMarkIdForNodeId(int nodeId, unsigned pageIndex) {
242258
if (!fRoot) {
243259
return -1;
244260
}
@@ -258,6 +274,25 @@ int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
258274
return markId;
259275
}
260276

277+
int SkPDFTagTree::createStructParentKeyForNodeId(int nodeId, unsigned pageIndex) {
278+
if (!fRoot) {
279+
return -1;
280+
}
281+
SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
282+
if (!tagPtr) {
283+
return -1;
284+
}
285+
SkPDFTagNode* tag = *tagPtr;
286+
SkASSERT(tag);
287+
288+
tag->fCanDiscard = SkPDFTagNode::kNo;
289+
290+
int nextStructParentKey = kFirstAnnotationStructParentKey +
291+
static_cast<int>(fParentTreeAnnotationNodeIds.size());
292+
fParentTreeAnnotationNodeIds.push_back(nodeId);
293+
return nextStructParentKey;
294+
}
295+
261296
static bool can_discard(SkPDFTagNode* node) {
262297
if (node->fCanDiscard == SkPDFTagNode::kYes) {
263298
return true;
@@ -298,9 +333,10 @@ SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference
298333
mcr->insertInt("MCID", info.fMarkId);
299334
kids->appendObject(std::move(mcr));
300335
}
301-
for (SkPDFIndirectReference annotationRef : node->fAnnotations) {
336+
for (const SkPDFTagNode::AnnotationInfo& annotationInfo : node->fAnnotations) {
302337
std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR");
303-
annotationDict->insertRef("Obj", annotationRef);
338+
annotationDict->insertRef("Obj", annotationInfo.fAnnotationRef);
339+
annotationDict->insertRef("Pg", doc->getPage(annotationInfo.fPageIndex));
304340
kids->appendObject(std::move(annotationDict));
305341
}
306342
node->fRef = ref;
@@ -333,7 +369,7 @@ SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference
333369
return doc->emit(dict, ref);
334370
}
335371

336-
void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef) {
372+
void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef, unsigned pageIndex) {
337373
if (!fRoot) {
338374
return;
339375
}
@@ -343,9 +379,10 @@ void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotati
343379
}
344380
SkPDFTagNode* tag = *tagPtr;
345381
SkASSERT(tag);
346-
tag->fAnnotations.push_back(annotationRef);
347-
}
348382

383+
SkPDFTagNode::AnnotationInfo annotationInfo = {pageIndex, annotationRef};
384+
tag->fAnnotations.push_back(annotationInfo);
385+
}
349386

350387
SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
351388
if (!fRoot) {
@@ -363,11 +400,15 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
363400
structTreeRoot.insertRef("K", PrepareTagTreeToEmit(ref, fRoot, doc));
364401
structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
365402

366-
// Build the parent tree, which is a mapping from the marked
367-
// content IDs on each page to their corressponding tags.
403+
// Build the parent tree, which consists of two things:
404+
// (1) For each page, a mapping from the marked content IDs on
405+
// each page to their corresponding tags
406+
// (2) For each annotation, an indirect reference to that
407+
// annotation's struct tree element.
368408
SkPDFDict parentTree("ParentTree");
369409
auto parentTreeNums = SkPDFMakeArray();
370410

411+
// First, one entry per page.
371412
SkASSERT(fMarksPerPage.size() <= pageCount);
372413
for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
373414
const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
@@ -379,6 +420,21 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
379420
parentTreeNums->appendInt(j);
380421
parentTreeNums->appendRef(doc->emit(markToTagArray));
381422
}
423+
424+
// Then, one entry per annotation.
425+
for (size_t j = 0; j < fParentTreeAnnotationNodeIds.size(); ++j) {
426+
int nodeId = fParentTreeAnnotationNodeIds[j];
427+
int structParentKey = kFirstAnnotationStructParentKey + static_cast<int>(j);
428+
429+
SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
430+
if (!tagPtr) {
431+
continue;
432+
}
433+
SkPDFTagNode* tag = *tagPtr;
434+
parentTreeNums->appendInt(structParentKey);
435+
parentTreeNums->appendRef(tag->fRef);
436+
}
437+
382438
parentTree.insertObject("Nums", std::move(parentTreeNums));
383439
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
384440

src/pdf/SkPDFTag.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,16 @@ class SkPDFTagTree {
2323
~SkPDFTagTree();
2424
void init(SkPDF::StructureElementNode*);
2525
void reset();
26-
int getMarkIdForNodeId(int nodeId, unsigned pageIndex);
27-
void addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef);
26+
// Used to allow marked content to refer to its corresponding structure
27+
// tree node, via a page entry in the parent tree. Returns -1 if no
28+
// mark ID.
29+
int createMarkIdForNodeId(int nodeId, unsigned pageIndex);
30+
// Used to allow annotations to refer to their corresponding structure
31+
// tree node, via the struct parent tree. Returns -1 if no struct parent
32+
// key.
33+
int createStructParentKeyForNodeId(int nodeId, unsigned pageIndex);
34+
35+
void addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef, unsigned pageIndex);
2836
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
2937

3038
private:
@@ -48,6 +56,7 @@ class SkPDFTagTree {
4856
SkPDFTagNode* fRoot = nullptr;
4957
SkTArray<SkTArray<SkPDFTagNode*>> fMarksPerPage;
5058
std::vector<IDTreeEntry> fIdTreeEntries;
59+
std::vector<int> fParentTreeAnnotationNodeIds;
5160

5261
SkPDFTagTree(const SkPDFTagTree&) = delete;
5362
SkPDFTagTree& operator=(const SkPDFTagTree&) = delete;

0 commit comments

Comments
 (0)