Skip to content

Commit

Permalink
Include radio buttons when processing forms
Browse files Browse the repository at this point in the history
- 🌎 Currently, inputs are collected and rendered individually.
- ⛔ This means that groups of elements, such as radio buttons which may only have one value checked at once, don't have a way to identify other elements in the group.
- ✅ This commit keeps a concept of "forms" in between "page" and "inputs", and uses those forms to build groups of radio buttons.
  • Loading branch information
okkays committed May 1, 2024
1 parent d5d7ce3 commit 4500387
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 33 deletions.
13 changes: 12 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,15 @@ def test_partial_pdf_custom_metadata():
('<input>', ['/Tx', '/V ()']),
('<input value="">', ['/Tx', '/V ()']),
('<input type="checkbox">', ['/Btn']),
('<input type="radio">',
['/Btn', '/V /Off', '/AS /Off', f'/Ff {1 << (16 - 1)}']),
('<input checked type="radio" name="foo" value="Some Value">',
['/Btn', '/TU (foo)', '/V (Some Value)', '/AS (Some Value)']),
('<form><input type="radio" name="foo" value="v1"></form>'
'<form><input checked type="radio" name="foo" value="v1"></form>',
['/Btn', '/V (v1)',
'/AS (v1)', '/V (v1)',
'/AS /Off', '/V /Off']),
('<textarea></textarea>', ['/Tx', '/V ()']),
('<select><option value="a">A</option></select>', ['/Ch', '/Opt']),
('<select>'
Expand All @@ -525,7 +534,9 @@ def test_partial_pdf_custom_metadata():
def test_pdf_inputs(html, fields):
stdout = _run('--pdf-forms --uncompressed-pdf - -', html.encode())
assert b'AcroForm' in stdout
assert all(field.encode() in stdout for field in fields)
print(stdout)
for field in fields:
assert field.encode() in stdout
stdout = _run('--uncompressed-pdf - -', html.encode())
assert b'AcroForm' not in stdout

Expand Down
16 changes: 12 additions & 4 deletions weasyprint/anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def rectangle_aabb(matrix, pos_x, pos_y, width, height):
return box_x1, box_y1, box_x2, box_y2


def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
def gather_anchors(box, anchors, links, bookmarks, forms,
parent_matrix=None,
parent_form=None):
"""Gather anchors and other data related to specific positions in PDF.
Currently finds anchors, links, bookmarks and inputs.
Currently finds anchors, links, bookmarks and forms.
"""
# Get box transformation matrix.
Expand Down Expand Up @@ -89,6 +91,11 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
has_anchor = anchor_name and anchor_name not in anchors
is_input = box.is_input()

if box.is_form():
parent_form = box.element
if parent_form not in forms:
forms[parent_form] = []

if has_bookmark or has_link or has_anchor or is_input:
if is_input:
pos_x, pos_y = box.content_box_x(), box.content_box_y()
Expand All @@ -106,7 +113,7 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
link_type = 'attachment'
links.append((link_type, target, rectangle, box))
if is_input:
inputs.append((box.element, box.style, rectangle))
forms[parent_form].append((box.element, box.style, rectangle))
if matrix and (has_bookmark or has_anchor):
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
if has_bookmark:
Expand All @@ -116,7 +123,8 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
anchors[anchor_name] = pos_x, pos_y

for child in box.all_children():
gather_anchors(child, anchors, links, bookmarks, inputs, matrix)
gather_anchors(child, anchors, links, bookmarks, forms, matrix,
parent_form=parent_form)


def make_page_bookmark_tree(page, skipped_levels, last_by_depth,
Expand Down
13 changes: 9 additions & 4 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Page:
instantiated directly.
"""

def __init__(self, page_box):
#: The page width, including margins, in CSS pixels.
self.width = page_box.margin_width()
Expand Down Expand Up @@ -67,14 +68,16 @@ def __init__(self, page_box):
#: ``(x, y)`` point in CSS pixels from the top-left of the page.
self.anchors = {}

#: The :obj:`list` of ``(element, attributes, rectangle)`` :obj:`tuples
#: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
#: The :obj:`dict` mapping form elements to a list
#: of ``(element, attributes, rectangle)`` :obj:`tuples <tuple>`.
#: A ``rectangle`` is ``(x, y, width, height)``, in CSS
#: pixels from the top-left of the page. ``atributes`` is a
#: :obj:`dict` of HTML tag attributes and values.
self.inputs = []
#: The key ``None`` will contain inputs that are not part of a form.
self.forms = {None: []}

gather_anchors(
page_box, self.anchors, self.links, self.bookmarks, self.inputs)
page_box, self.anchors, self.links, self.bookmarks, self.forms)
self._page_box = page_box

def paint(self, stream, scale=1):
Expand Down Expand Up @@ -105,6 +108,7 @@ class DocumentMetadata:
New attributes may be added in future versions of WeasyPrint.
"""

def __init__(self, title=None, authors=None, description=None,
keywords=None, generator=None, created=None, modified=None,
attachments=None, lang=None, custom=None):
Expand Down Expand Up @@ -162,6 +166,7 @@ class DiskCache:
(i.e. RasterImage instances) are still stored in memory.
"""

def __init__(self, folder):
self._path = Path(folder)
self._path.mkdir(parents=True, exist_ok=True)
Expand Down
6 changes: 6 additions & 0 deletions weasyprint/formatting_structure/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,12 @@ def is_input(self):
return not isinstance(self, (LineBox, TextBox))
return False

def is_form(self):
"""Return whether this box is a form element."""
if self.element is None:
return False
return self.element.tag in 'form'


class ParentBox(Box):
"""A box that has children."""
Expand Down
6 changes: 3 additions & 3 deletions weasyprint/pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .stream import Stream

from .anchors import ( # isort:skip
add_annotations, add_inputs, add_links, add_outlines, resolve_links,
add_annotations, add_forms, add_links, add_outlines, resolve_links,
write_pdf_attachment)

VARIANTS = {
Expand Down Expand Up @@ -184,8 +184,8 @@ def generate_pdf(document, target, zoom, **options):
add_annotations(
links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files,
compress)
add_inputs(
page.inputs, matrix, pdf, pdf_page, resources, stream,
add_forms(
page.forms, matrix, pdf, pdf_page, resources, stream,
document.font_config.font_map, compress)
page.paint(stream, scale)

Expand Down
94 changes: 73 additions & 21 deletions weasyprint/pdf/anchors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Insert anchors, links, bookmarks and inputs in PDFs."""

import collections
import io
import mimetypes
from hashlib import md5
Expand Down Expand Up @@ -91,10 +92,31 @@ def add_outlines(pdf, bookmarks, parent=None):
return outlines, count


def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
compress):
def _make_checked_stream(resources, width, height, compress, style, font_size):
on_stream = pydyf.Stream(extra={
'Resources': resources.reference,
'Type': '/XObject',
'Subtype': '/Form',
'BBox': pydyf.Array((0, 0, width, height)),
}, compress=compress)
on_stream.push_state()
on_stream.begin_text()
on_stream.set_color_rgb(*style['color'][:3])
on_stream.set_font_size('ZaDb', font_size)
# Center (let’s assume that Dingbat’s check has a 0.8em size)
x = (width - font_size * 0.8) / 2
y = (height - font_size * 0.8) / 2
on_stream.move_text_to(x, y)
on_stream.show_text_string('4')
on_stream.end_text()
on_stream.pop_state()
return on_stream


def add_forms(forms, matrix, pdf, page, resources, stream, font_map,
compress):
"""Include form inputs in PDF."""
if not inputs:
if not forms or not any(forms.values()):
return

if 'Annots' not in page:
Expand All @@ -109,12 +131,19 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
context = ffi.gc(
pango.pango_font_map_create_context(font_map),
gobject.g_object_unref)
for i, (element, style, rectangle) in enumerate(inputs):
inputs_with_forms = [
(form, element, style, rectangle)
for form, inputs in forms.items()
for element, style, rectangle in inputs
]
radio_groups = collections.defaultdict(dict)
for i, (form, element, style, rectangle) in enumerate(inputs_with_forms):
rectangle = (
*matrix.transform_point(*rectangle[:2]),
*matrix.transform_point(*rectangle[2:]))

input_type = element.attrib.get('type')
input_value = element.attrib.get('value', 'Yes')
default_name = f'unknown-{page_reference.decode()}-{i}'
input_name = element.attrib.get('name', default_name)
# TODO: where does this 0.75 scale come from?
Expand All @@ -125,23 +154,9 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
# Checkboxes
width = rectangle[2] - rectangle[0]
height = rectangle[1] - rectangle[3]
checked_stream = pydyf.Stream(extra={
'Resources': resources.reference,
'Type': '/XObject',
'Subtype': '/Form',
'BBox': pydyf.Array((0, 0, width, height)),
}, compress=compress)
checked_stream.push_state()
checked_stream.begin_text()
checked_stream.set_color_rgb(*style['color'][:3])
checked_stream.set_font_size('ZaDb', font_size)
# Center (let’s assume that Dingbat’s check has a 0.8em size)
x = (width - font_size * 0.8) / 2
y = (height - font_size * 0.8) / 2
checked_stream.move_text_to(x, y)
checked_stream.show_text_string('4')
checked_stream.end_text()
checked_stream.pop_state()
checked_stream = _make_checked_stream(resources,
width, height,
compress, style, font_size)
pdf.add_object(checked_stream)

checked = 'checked' in element.attrib
Expand All @@ -161,6 +176,43 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
'AS': '/Yes' if checked else '/Off',
'DA': pydyf.String(b' '.join(field_stream.stream)),
})
elif input_type == 'radio':
if input_name not in radio_groups[form]:
new_group = pydyf.Dictionary({
'Type': '/Annot',
'Subtype': '/Widget',
'FT': '/Btn',
'Ff': 1 << (16 - 1), # Radio flag
'F': 1 << (3 - 1), # Print flag
'P': page.reference,
'T': pydyf.String(f'{hash(form)}-{input_name}'),
'TU': pydyf.String(input_name),
'V': '/Off',
'Kids': pydyf.Array(),
})
pdf.add_object(new_group)
page['Annots'].append(new_group.reference)
radio_groups[form][input_name] = new_group
group = radio_groups[form][input_name]
width = rectangle[2] - rectangle[0]
height = rectangle[1] - rectangle[3]
on_stream = _make_checked_stream(resources,
width, height,
compress, style, font_size)
checked = 'checked' in element.attrib
field = pydyf.Dictionary({
'Type': '/Annot',
'Subtype': '/Widget',
'Rect': pydyf.Array(rectangle),
'Parent': group.reference,
'AS': pydyf.String(input_value) if checked else '/Off',
'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
pydyf.String(input_value): on_stream.reference,
})}),
})
if checked:
group['V'] = pydyf.String(input_value)
group['Kids'].append(field.reference)
elif element.tag == 'select':
# Select fields
font_description = get_font_description(style)
Expand Down

0 comments on commit 4500387

Please sign in to comment.