Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add booklet command #77

Merged
merged 4 commits into from
Dec 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions pdfly/booklet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""
Reorder and two-up PDF pages for booklet printing.

If the number of pages is not a multiple of four, pages are
added until it is a multiple of four. This includes a centerfold
in the middle of the booklet and a single page on the inside
back cover. The content of those pages are from the
centerfold-file and blank-page-file files, if specified, otherwise
they are blank pages.

Example:
pdfly booklet input.pdf output.pdf

"""

# Copyright (c) 2014, Steve Witham <[email protected]>.
# All rights reserved. This software is available under a BSD license;
# see https://github.com/py-pdf/pypdf/LICENSE

import sys
import traceback
from pathlib import Path
from typing import Generator, Optional, Tuple

from pypdf import (
PageObject,
PdfReader,
PdfWriter,
)
from pypdf.generic import RectangleObject


def main(
filename: Path,
output: Path,
inside_cover_file: Optional[Path],
centerfold_file: Optional[Path],
) -> None:
try:
# Set up the streams
reader = PdfReader(filename)
pages = list(reader.pages)
writer = PdfWriter()

# Add blank pages to make the number of pages a multiple of 4
# If the user specified an inside-back-cover file, use it.
blank_page = PageObject.create_blank_page(
width=pages[0].mediabox.width, height=pages[0].mediabox.height
)
if len(pages) % 2 == 1:
if inside_cover_file:
ic_reader_page = fetch_first_page(inside_cover_file)
pages.insert(-1, ic_reader_page)
else:
pages.insert(-1, blank_page)
if len(pages) % 4 == 2:
pages.insert(len(pages) // 2, blank_page)
pages.insert(len(pages) // 2, blank_page)
requires_centerfold = True
else:
requires_centerfold = False

# Reorder the pages and place two pages side by side (2-up) on each sheet
for lhs, rhs in page_iter(len(pages)):
pages[lhs].merge_translated_page(
page2=pages[rhs],
tx=pages[lhs].mediabox.width,
ty=0,
expand=True,
over=True,
)
writer.add_page(pages[lhs])

# If a centerfold was required, it is already
# present as a pair of blank pages. If the user
# specified a centerfold file, use it instead.
if requires_centerfold and centerfold_file:
centerfold_page = fetch_first_page(centerfold_file)
last_page = writer.pages[-1]
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
if requires_rotate(centerfold_page.mediabox, last_page.mediabox):
centerfold_page = centerfold_page.rotate(270)
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
last_page.merge_page(centerfold_page)

# Everything looks good! Write the output file.
with open(output, "wb") as output_fh:
writer.write(output_fh)

except Exception:
print(traceback.format_exc(), file=sys.stderr)
print(f"Error while reading {filename}", file=sys.stderr)
sys.exit(1)


def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool:
"""
Return True if a and b are rotated relative to each other.

Args:
a (RectangleObject): The first rectangle.
b (RectangleObject): The second rectangle.

"""
a_portrait = a.height > a.width
b_portrait = b.height > b.width
return a_portrait != b_portrait


def fetch_first_page(filename: Path) -> PageObject:
"""
Fetch the first page of a PDF file.

Args:
filename (Path): The path to the PDF file.

Returns:
PageObject: The first page of the PDF file.

"""
return PdfReader(filename).pages[0]


# This function written with inspiration, assistance, and code
# from claude.ai & Github Copilot
def page_iter(num_pages: int) -> Generator[Tuple[int, int], None, None]:
"""
Generate pairs of page numbers for printing a booklet.
This function assumes that the total number of pages is divisible by 4.
It yields tuples of page numbers that should be printed on the same sheet
of paper to create a booklet.

Args:
num_pages (int): The total number of pages in the document. Must be divisible by 4.

Yields:
Generator[Tuple[int, int], None, None]: Tuples containing pairs of page numbers.
Each tuple represents the page numbers to be printed on one side of a sheet.

Raises:
ValueError: If the number of pages is not divisible by 4.

"""
if num_pages % 4 != 0:
raise ValueError("Number of pages must be divisible by 4")

for sheet in range(num_pages // 4):
# Outside the fold
last_page = num_pages - sheet * 2 - 1
first_page = sheet * 2

# Inside the fold
second_page = sheet * 2 + 1
second_to_last_page = num_pages - sheet * 2 - 2

yield last_page, first_page
yield second_page, second_to_last_page
47 changes: 46 additions & 1 deletion pdfly/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
"""

from pathlib import Path
from typing import List
from typing import List, Optional

import typer
from typing_extensions import Annotated

import pdfly.booklet
import pdfly.cat
import pdfly.compress
import pdfly.extract_images
Expand Down Expand Up @@ -98,6 +99,50 @@ def cat(
pdfly.cat.main(filename, fn_pgrgs, output, verbose)


@entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc]
def booklet(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=False,
resolve_path=False,
),
],
blank_page: Annotated[
Optional[Path],
typer.Option(
"-b",
"--blank-page-file",
help="page added if input is odd number of pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
centerfold: Annotated[
Optional[Path],
typer.Option(
"-c",
"--centerfold-file",
help="double-page added if input is missing >= 2 pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
) -> None:
pdfly.booklet.main(filename, output, blank_page, centerfold)


@entry_point.command(name="rm", help=pdfly.rm.__doc__)
def rm(
filename: Annotated[
Expand Down
Binary file added resources/b.pdf
Binary file not shown.
Binary file added resources/c.pdf
Binary file not shown.
Binary file added resources/input8.pdf
Binary file not shown.
121 changes: 121 additions & 0 deletions tests/test_booklet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import pytest
from pypdf import PdfReader

from .conftest import RESOURCES_ROOT, chdir, run_cli


def test_booklet_fewer_args(capsys, tmp_path):
with chdir(tmp_path):
exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")])
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing argument" in captured.err


def test_booklet_extra_args(capsys, tmp_path):
with chdir(tmp_path):
exit_code = run_cli(
["booklet", str(RESOURCES_ROOT / "box.pdf"), "a.pdf", "b.pdf"]
)
assert exit_code == 2
captured = capsys.readouterr()
assert "unexpected extra argument" in captured.err


def test_booklet_page_size(capsys, tmp_path):
in_fname = str(RESOURCES_ROOT / "input8.pdf")

with chdir(tmp_path):
exit_code = run_cli(
[
"booklet",
in_fname,
"output8.pdf",
]
)
in_reader = PdfReader(in_fname)
out_reader = PdfReader("output8.pdf")

assert exit_code == 0

assert len(in_reader.pages) == 8
assert len(out_reader.pages) == 4

in_height = in_reader.pages[0].mediabox.height
in_width = in_reader.pages[0].mediabox.width
out_height = out_reader.pages[0].mediabox.height
out_width = out_reader.pages[0].mediabox.width

assert out_width == in_width * 2
assert in_height == out_height


@pytest.mark.parametrize(
("page_count", "expected", "expected_bc"),
[
("8", "81\n27\n63\n45\n", "81\n27\n63\n45\n"),
("7", "71\n2\n63\n45\n", "71\n2b\n63\n45\n"),
("6", "61\n25\n43\n\n", "61\n25\n43\nc\n"),
("5", "51\n2\n43\n\n", "51\n2b\n43\nc\n"),
("4", "41\n23\n", "41\n23\n"),
("3", "31\n2\n", "31\n2b\n"),
("2", "21\n\n", "21\nc\n"),
("1", "1\n\n", "1b\nc\n"),
],
)
def test_booklet_order(capsys, tmp_path, page_count, expected, expected_bc):
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
"-o",
f"input{page_count}.pdf",
str(RESOURCES_ROOT / "input8.pdf"),
f":{page_count}",
]
)
assert exit_code == 0

exit_code = run_cli(
[
"booklet",
f"input{page_count}.pdf",
f"output{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err

exit_code = run_cli(
[
"extract-text",
f"output{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
assert captured.out == expected

exit_code = run_cli(
[
"booklet",
"--centerfold-file",
str(RESOURCES_ROOT / "c.pdf"),
"--blank-page-file",
str(RESOURCES_ROOT / "b.pdf"),
f"input{page_count}.pdf",
f"outputbc{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err

exit_code = run_cli(
[
"extract-text",
f"outputbc{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
assert captured.out == expected_bc
Loading