diff --git a/docs/common_use_cases.rst b/docs/common_use_cases.rst
index 8d506826f..59962b932 100644
--- a/docs/common_use_cases.rst
+++ b/docs/common_use_cases.rst
@@ -102,8 +102,8 @@ such as page numbers, headers, etc. Read more about the page_ at-rule.
.. _page: https://developer.mozilla.org/en-US/docs/Web/CSS/@page
-Generate PDFs Specialized for Accessibility (PDF/UA) and Archiving (PDF/A)
---------------------------------------------------------------------------
+Generate Specialized PDFs
+-------------------------
WeasyPrint can generate different PDF variants, including PDF/UA and PDF/A. The
feature is available by using the ``--pdf-variant`` CLI option, or the
@@ -125,8 +125,8 @@ Even if WeasyPrint tries to generate valid documents, the result is not
guaranteed: the HTML, CSS and PDF features chosen by the user must follow the
limitations defined by the different specifications.
-PDF/A
-.....
+PDF/A (Archiving)
+.................
PDF/A documents are specialized for archiving purposes. They are a simple
subset of PDF, with a lot of limitations: no audio, video or JavaScript,
@@ -145,8 +145,8 @@ valid PDF identifier, but you can provide your own with the
If your document includes images, you must set the ``image-rendering:
crisp-edges`` property to avoid anti-aliasing, that is forbidden by PDF/A.
-PDF/UA
-......
+PDF/UA (Universal Accessibility)
+................................
PDF/UA documents are specialized for accessibility purposes. They include extra
metadata that define document information and content structure.
@@ -158,6 +158,179 @@ also used to define the order of the PDF content.
Some information is required in your HTML file, including a ``
`` tag,
and a ``lang`` attribute set on the ```` tag.
+Factur-X / ZUGFeRD (Electronic Invoices)
+........................................
+
+Factur-X / ZUGFeRD is a Franco-German standard for hybrid e-invoice, the first
+implementation of the European Semantic Standard EN 16931. It enables users to
+include normalized metadata in PDF invoices, such as companies information or
+invoice amounts, so that compatible software can automatically read this
+information. This standard is based on PDF/A-3b.
+
+WeasyPrint can generate Factur-X / ZUGFeRD documents. Invoice metadata must be
+generated by the user and included in the PDF document when rendered. Two
+different metadata files are required:
+
+- the first one is RDF metadata, containing document metadata and PDF/A
+ extension information;
+- the second one is Factur-X / ZUGFeRD metadata, containing invoice amounts,
+ plus seller and buyer information.
+
+Here is an example of Factur-X document generation.
+
+``rdf.xml``:
+
+.. code-block:: xml
+
+
+
+
+
+ MINIMUM
+ factur-x.xml
+ INVOICE
+ 1.0
+
+
+
+
+
+ Factur-X PDFA Extension Schema
+ urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#
+ fx
+
+
+
+ DocumentFileName
+ Text
+ external
+ name of the embedded XML invoice file
+
+
+ DocumentType
+ Text
+ external
+ INVOICE
+
+
+ Version
+ Text
+ external
+ The actual version of the Factur-X XML schema
+
+
+ ConformanceLevel
+ Text
+ external
+ The conformance level of the embedded Factur-X data
+
+
+
+
+
+
+
+
+
+
+``factur-x.xml``:
+
+.. code-block:: xml
+
+
+
+
+ A1
+
+
+ urn:factur-x.eu:1p0:minimum
+
+
+
+ 123
+ 380
+
+ 20200131
+
+
+
+
+ Buyer
+
+ Supplyer Corp
+
+ 123456782
+
+
+ FR
+
+
+ FR11123456782
+
+
+
+ Buyer Corp
+
+ 987654324
+
+
+
+ 456
+
+
+
+
+ EUR
+
+ 100.00
+ 20.00
+ 120.00
+ 120.00
+
+
+
+
+
+``invoice.py``:
+
+.. code-block:: python
+
+ from pathlib import Path
+ from weasyprint import Attachment, HTML
+
+ def generate_rdf_metadata(metadata, variant, version, conformance):
+ original_rdf = generate_original_rdf_metadata(metadata, variant, version, conformance)
+ return Path("rdf.xml").read_bytes().replace(b"", original_rdf)
+
+ document = HTML(string="Invoice
").render()
+ generate_original_rdf_metadata = document.metadata.generate_rdf_metadata
+
+ factur_x_xml = Path("factur-x.xml").read_text()
+ attachment = Attachment(string=factur_x_xml, name="factur-x.xml", relationship="Data")
+ document.metadata.attachments = [attachment]
+
+ document.metadata.generate_rdf_metadata = generate_rdf_metadata
+ document.write_pdf("invoice.pdf", pdf_variant="pdf/a-3b")
+
+Of course, the content of these files has to be adapted to the content of real
+invoices. Using XML generators instead of plain text manipulation is also
+highly recommended.
+
+A more detailed blog article is available on `Binary Butterfly’s website
+`_.
+
Include PDF Forms
-----------------
diff --git a/tests/test_api.py b/tests/test_api.py
index 48b3efe63..5ee0644b0 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -18,6 +18,7 @@
from weasyprint import CSS, HTML, __main__, default_url_fetcher
from weasyprint.pdf.anchors import resolve_links
+from weasyprint.pdf.metadata import generate_rdf_metadata
from weasyprint.urls import path2url
from .draw import parse_pixels
@@ -414,14 +415,14 @@ def test_command_line_render(tmp_path):
os.environ.pop('SOURCE_DATE_EPOCH')
stdout = _run('combined.html --uncompressed-pdf -')
- assert stdout.count(b'attachment') == 0
+ assert stdout.count(b'Filespec') == 0
stdout = _run('combined.html --uncompressed-pdf -')
- assert stdout.count(b'attachment') == 0
+ assert stdout.count(b'Filespec') == 0
stdout = _run('-a pattern.png --uncompressed-pdf combined.html -')
- assert stdout.count(b'attachment') == 1
+ assert stdout.count(b'Filespec') == 1
stdout = _run(
'-a style.css -a pattern.png --uncompressed-pdf combined.html -')
- assert stdout.count(b'attachment') == 2
+ assert stdout.count(b'Filespec') == 2
_run('combined.html out23.pdf --timeout 30')
assert (tmp_path / 'out23.pdf').read_bytes() == pdf_bytes
@@ -1140,6 +1141,7 @@ def assert_meta(html, **meta):
meta.setdefault('attachments', [])
meta.setdefault('lang', None)
meta.setdefault('custom', {})
+ meta.setdefault('generate_rdf_metadata', generate_rdf_metadata)
assert vars(FakeHTML(string=html).render().metadata) == meta
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index d657e6d9a..ee24535da 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -598,7 +598,7 @@ def test_embedded_files_attachments(tmp_path):
]
)
assert f'<{hashlib.md5(b"hi there").hexdigest()}>'.encode() in pdf
- assert b'/F ()' in pdf
+ assert b'/F (attachment.bin)' in pdf
assert b'/UF (attachment.bin)' in pdf
name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be')
assert b'/Desc <' + name.hex().encode() + b'>' in pdf
@@ -716,3 +716,29 @@ def test_bleed(style, media, bleed, trim):
assert f'/MediaBox {str(media).replace(",", "")}'.encode() in pdf
assert f'/BleedBox {str(bleed).replace(",", "")}'.encode() in pdf
assert f'/TrimBox {str(trim).replace(",", "")}'.encode() in pdf
+
+
+@assert_no_logs
+def test_default_rdf_metadata():
+ pdf_document = FakeHTML(string='test').render()
+
+ pdf_document.metadata.title = None
+
+ pdf_bytes = pdf_document.write_pdf(
+ pdf_variant='pdf/a-3b', pdf_identifier=b'example-bytes', uncompressed_pdf=True)
+ assert b'test').render()
+
+ pdf_document.metadata.title = None
+ pdf_document.metadata.generate_rdf_metadata = generate_rdf_metadata
+
+ pdf_bytes = pdf_document.write_pdf(
+ pdf_variant='pdf/a-3b', pdf_identifier=b'example-bytes', uncompressed_pdf=True)
+ assert b'TEST_METADATA' in pdf_bytes
diff --git a/weasyprint/__init__.py b/weasyprint/__init__.py
index 40635bf0a..a3acac430 100644
--- a/weasyprint/__init__.py
+++ b/weasyprint/__init__.py
@@ -318,6 +318,9 @@ class Attachment:
HTML specific arguments (``encoding`` and ``media_type``) are not
supported.
+ :param str name:
+ The name of the attachment to be included in the PDF document.
+ May be :obj:`None`.
:param str description:
A description of the attachment to be included in the PDF document.
May be :obj:`None`.
@@ -335,11 +338,12 @@ class Attachment:
"""
def __init__(self, guess=None, filename=None, url=None, file_obj=None,
string=None, base_url=None, url_fetcher=default_url_fetcher,
- description=None, created=None, modified=None,
+ name=None, description=None, created=None, modified=None,
relationship='Unspecified'):
self.source = _select_source(
guess, filename, url, file_obj, string, base_url=base_url,
url_fetcher=url_fetcher)
+ self.name = name
self.description = description
self.relationship = relationship
self.md5 = None
diff --git a/weasyprint/document.py b/weasyprint/document.py
index f630fe6a5..235cf2c18 100644
--- a/weasyprint/document.py
+++ b/weasyprint/document.py
@@ -18,6 +18,7 @@
from .logger import PROGRESS_LOGGER
from .matrix import Matrix
from .pdf import VARIANTS, generate_pdf
+from .pdf.metadata import generate_rdf_metadata
from .text.fonts import FontConfiguration
@@ -105,12 +106,10 @@ class DocumentMetadata:
"""Meta-information belonging to a whole :class:`Document`.
New attributes may be added in future versions of WeasyPrint.
-
"""
-
- def __init__(self, title=None, authors=None, description=None,
- keywords=None, generator=None, created=None, modified=None,
- attachments=None, lang=None, custom=None):
+ def __init__(self, title=None, authors= None, description=None, keywords=None,
+ generator=None, created=None, modified=None, attachments=None,
+ lang=None, custom=None, generate_rdf_metadata=generate_rdf_metadata):
#: The title of the document, as a string or :obj:`None`.
#: Extracted from the ```` element in HTML
#: and written to the ``/Title`` info field in PDF.
@@ -156,6 +155,9 @@ def __init__(self, title=None, authors=None, description=None,
#: Custom metadata, as a dict whose keys are the metadata names and
#: values are the metadata values.
self.custom = custom or {}
+ #: Custom RDF metadata generator, which will replace the default generator.
+ #: The function should return bytes containing an RDF XML.
+ self.generate_rdf_metadata = generate_rdf_metadata
class DiskCache:
diff --git a/weasyprint/pdf/__init__.py b/weasyprint/pdf/__init__.py
index 27dce58ce..65caf72de 100644
--- a/weasyprint/pdf/__init__.py
+++ b/weasyprint/pdf/__init__.py
@@ -271,7 +271,7 @@ def generate_pdf(document, target, zoom, **options):
if pdf_attachments:
content = pydyf.Dictionary({'Names': pydyf.Array()})
for i, pdf_attachment in enumerate(pdf_attachments):
- content['Names'].append(pydyf.String(f'attachment{i}'))
+ content['Names'].append(pdf_attachment['F'])
content['Names'].append(pdf_attachment.reference)
pdf.add_object(content)
if 'Names' not in pdf.catalog:
diff --git a/weasyprint/pdf/anchors.py b/weasyprint/pdf/anchors.py
index ebf489cdc..2c3858ac7 100644
--- a/weasyprint/pdf/anchors.py
+++ b/weasyprint/pdf/anchors.py
@@ -351,7 +351,9 @@ def write_pdf_attachment(pdf, attachment, compress):
# TODO: Use the result object from a URL fetch operation to provide more
# details on the possible filename and MIME type.
- if url and urlsplit(url).path:
+ if attachment.name:
+ filename = attachment.name
+ elif url and urlsplit(url).path:
filename = basename(unquote(urlsplit(url).path))
else:
filename = 'attachment.bin'
@@ -376,7 +378,7 @@ def write_pdf_attachment(pdf, attachment, compress):
pdf_attachment = pydyf.Dictionary({
'Type': '/Filespec',
- 'F': pydyf.String(),
+ 'F': pydyf.String(filename.encode(errors='ignore')),
'UF': pydyf.String(filename),
'EF': pydyf.Dictionary({'F': file_stream.reference}),
'Desc': pydyf.String(attachment.description or ''),
diff --git a/weasyprint/pdf/metadata.py b/weasyprint/pdf/metadata.py
index 7cde641f2..12ce45282 100644
--- a/weasyprint/pdf/metadata.py
+++ b/weasyprint/pdf/metadata.py
@@ -25,7 +25,22 @@ def add_metadata(pdf, metadata, variant, version, conformance, compress):
Described in ISO-32000-1:2008, 14.3.2.
"""
- # Add metadata
+ header = b''
+ footer = b''
+ xml_data = metadata.generate_rdf_metadata(metadata, variant, version, conformance)
+ stream_content = b'\n'.join((header, xml_data, footer))
+ extra = {'Type': '/Metadata', 'Subtype': '/XML'}
+ metadata = pydyf.Stream([stream_content], extra, compress)
+ pdf.add_object(metadata)
+ pdf.catalog['Metadata'] = metadata.reference
+
+
+def generate_rdf_metadata(metadata, variant, version, conformance):
+ """Generate RDF metadata as a bytestring.
+
+ Might be replaced by DocumentMetadata.rdf_metadata_generator().
+
+ """
namespace = f'pdf{variant}id'
rdf = Element(f'{{{NS["rdf"]}}}RDF')
@@ -82,11 +97,4 @@ def add_metadata(pdf, metadata, variant, version, conformance, compress):
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
element = SubElement(element, f'{{{NS["xmp"]}}}ModifyDate')
element.text = metadata.modified
- xml = tostring(rdf, encoding='utf-8')
- header = b''
- footer = b''
- stream_content = b'\n'.join((header, xml, footer))
- extra = {'Type': '/Metadata', 'Subtype': '/XML'}
- metadata = pydyf.Stream([stream_content], extra, compress)
- pdf.add_object(metadata)
- pdf.catalog['Metadata'] = metadata.reference
+ return tostring(rdf, encoding='utf-8')