diff --git a/weasyprint/formatting_structure/build.py b/weasyprint/formatting_structure/build.py index d46a4ff3d..75ca503b4 100644 --- a/weasyprint/formatting_structure/build.py +++ b/weasyprint/formatting_structure/build.py @@ -244,6 +244,15 @@ def before_after_to_box(element, pseudo_type, state, style_for, target_collector, counter_style)) box.children = children + + # calculate the bookmark-label + if style['bookmark_label'] == 'none': + box.bookmark_label = '' + else: + _quote_depth, counter_values, _counter_scopes = state + compute_bookmark_label( + element, box, style['bookmark_label'], counter_values, + target_collector, counter_style) return [box] diff --git a/weasyprint/layout/__init__.py b/weasyprint/layout/__init__.py index 45457fd9f..e2bfe83da 100644 --- a/weasyprint/layout/__init__.py +++ b/weasyprint/layout/__init__.py @@ -140,10 +140,15 @@ def layout_document(html, root_box, context, max_loops=8): if not reloop_content and not reloop_pages: break - # Calculate string-sets and bookmark-label containing page based counters + # Calculate string-sets and bookmark-labels containing page based counters # when pagination is finished. No need to do that (maybe multiple times) in # make_page because they dont create boxes, only appear in MarginBoxes and # in the final PDF. + # Prevent repetition of bookmarks (see #1145). + + watch_elements = [] + watch_elements_before = [] + watch_elements_after = [] for i, page in enumerate(pages): # We need the updated page_counter_values resume_at, next_page, right_page, page_state, remake_state = ( @@ -151,12 +156,33 @@ def layout_document(html, root_box, context, max_loops=8): page_counter_values = page_state[1] for child in page.descendants(): + # Only one bookmark per original box + if child.bookmark_label: + if child.element_tag.endswith('::before'): + checklist = watch_elements_before + elif child.element_tag.endswith('::after'): + checklist = watch_elements_after + else: + checklist = watch_elements + if child.element in checklist: + child.bookmark_label = '' + else: + checklist.append(child.element) + # TODO: remove attribute or set a default value in Box class if hasattr(child, 'missing_link'): for (box, css_token), item in ( context.target_collector.counter_lookup_items.items()): if child.missing_link == box and css_token != 'content': + if (css_token == 'bookmark-label' and + not child.bookmark_label): + # don't refill it! + continue item.parse_again(page_counter_values) + # string_set is a pointer, but the bookmark_label is + # just a string: copy it + if css_token == 'bookmark-label': + child.bookmark_label = box.bookmark_label # Collect the string_sets in the LayoutContext string_sets = child.string_set if string_sets and string_sets != 'none': diff --git a/weasyprint/tests/test_pdf.py b/weasyprint/tests/test_pdf.py index 4305b372f..9b9b11eb6 100644 --- a/weasyprint/tests/test_pdf.py +++ b/weasyprint/tests/test_pdf.py @@ -331,6 +331,76 @@ def test_bookmarks_8(): assert o2.get_value('Title', '(.*)') == b'(g)' +@assert_no_logs +@requires('cairo', (1, 15, 4)) +def test_bookmarks_9(): + fileobj = io.BytesIO() + FakeHTML(string=''' +

a

+ ''').write_pdf(target=fileobj) + # h1 on page 1 + pdf_file = pdf.PDFFile(fileobj) + outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file) + assert outlines.get_type() == 'Outlines' + assert outlines.get_value('Count', '(.*)') == b'-1' + o1 = outlines.get_indirect_dict('First', pdf_file) + assert o1.get_value('Title', '(.*)') == b'(h1 on page 1)' + + +@assert_no_logs +@requires('cairo', (1, 15, 4)) +def test_bookmarks_10(): + fileobj = io.BytesIO() + FakeHTML(string=''' + +
a
+ ''').write_pdf(target=fileobj) + # x + # x + pdf_file = pdf.PDFFile(fileobj) + outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file) + assert outlines.get_type() == 'Outlines' + assert outlines.get_value('Count', '(.*)') == b'-2' + o1 = outlines.get_indirect_dict('First', pdf_file) + assert o1.get_value('Title', '(.*)') == b'(x)' + o2 = o1.get_indirect_dict('Next', pdf_file) + assert o2.get_value('Title', '(.*)') == b'(x)' + + +@assert_no_logs +@requires('cairo', (1, 15, 4)) +def test_bookmarks_11(): + fileobj = io.BytesIO() + FakeHTML(string=''' +
+ a + a + a +
+
+
b
+
c
+
+ ''').write_pdf(target=fileobj) + # a + # b + pdf_file = pdf.PDFFile(fileobj) + outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file) + assert outlines.get_type() == 'Outlines' + assert outlines.get_value('Count', '(.*)') == b'-2' + o1 = outlines.get_indirect_dict('First', pdf_file) + assert o1.get_value('Title', '(.*)') == b'(a)' + o2 = o1.get_indirect_dict('Next', pdf_file) + assert o2.get_value('Title', '(.*)') == b'(b)' + + @assert_no_logs def test_links_none(): fileobj = io.BytesIO()