diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ba355371..699f0fe49 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,6 +36,7 @@ repos: rev: 22.3.0 hooks: - id: black + args: [--target-version, py36] # - repo: https://github.com/asottile/pyupgrade # rev: v2.31.1 # hooks: diff --git a/PyPDF2/__init__.py b/PyPDF2/__init__.py index 0b29ce250..14f7628f6 100644 --- a/PyPDF2/__init__.py +++ b/PyPDF2/__init__.py @@ -1,4 +1,5 @@ from ._merger import PdfFileMerger +from ._page import Transformation from ._reader import PdfFileReader from ._version import __version__ from ._writer import PdfFileWriter @@ -10,6 +11,7 @@ "PageRange", "PaperSize", "parse_filename_page_ranges", + "Transformation", "PdfFileMerger", "PdfFileReader", "PdfFileWriter", diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 01b45d8a1..8a14a3084 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -30,17 +30,7 @@ import math import uuid from decimal import Decimal -from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Union, - cast, -) +from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union, cast from .constants import PageAttributes as PG from .constants import Ressources as RES @@ -57,7 +47,12 @@ RectangleObject, TextStringObject, ) -from .utils import b_, matrixMultiply +from .utils import ( + CompressedTransformationMatrix, + TransformationMatrixType, + b_, + matrixMultiply, +) def getRectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject: @@ -94,6 +89,88 @@ def createRectangleAccessor(name: str, fallback: Iterable[str]) -> property: ) +class Transformation: + """ + Specify a 2D transformation. + + The transformation between two coordinate systems is represented by a 3-by-3 + transformation matrix written as follows: + a b 0 + c d 0 + e f 1 + Because a transformation matrix has only six elements that can be changed, + it is usually specified in PDF as the six-element array [ a b c d e f ]. + + Coordinate transformations are expressed as matrix multiplications: + + a b 0 + [ x′ y′ 1 ] = [ x y 1 ] × c d 0 + e f 1 + + Usage + ----- + >>> from PyPDF2 import Transformation + >>> op = Transformation().scale(sx=2, sy=3).translate(tx=10, ty=20) + >>> page.mergeTransformedPage(page2, op) + """ + + # 9.5.4 Coordinate Systems for 3D + # 4.2.2 Common Transformations + def __init__(self, ctm: CompressedTransformationMatrix = (1, 0, 0, 1, 0, 0)): + self.ctm = ctm + + @property + def matrix(self) -> TransformationMatrixType: + return ( + (self.ctm[0], self.ctm[1], 0), + (self.ctm[2], self.ctm[3], 0), + (self.ctm[4], self.ctm[5], 1), + ) + + @staticmethod + def compress(matrix: TransformationMatrixType) -> CompressedTransformationMatrix: + return ( + matrix[0][0], + matrix[0][1], + matrix[1][0], + matrix[1][1], + matrix[0][2], + matrix[1][2], + ) + + def translate(self, tx: float = 0, ty: float = 0) -> "Transformation": + m = self.ctm + return Transformation(ctm=(m[0], m[1], m[2], m[3], m[4] + tx, m[5] + ty)) + + def scale( + self, sx: Optional[float] = None, sy: Optional[float] = None + ) -> "Transformation": + if sx is None and sy is None: + raise ValueError("Either sx or sy must be specified") + if sx is None: + sx = sy + if sy is None: + sy = sx + assert sx is not None + assert sy is not None + op: TransformationMatrixType = ((sx, 0, 0), (0, sy, 0), (0, 0, 1)) + ctm = Transformation.compress(matrixMultiply(self.matrix, op)) + return Transformation(ctm) + + def rotate(self, rotation: float) -> "Transformation": + rotation = math.radians(rotation) + op: TransformationMatrixType = ( + (math.cos(rotation), math.sin(rotation), 0), + (-math.sin(rotation), math.cos(rotation), 0), + (0, 0, 1), + ) + ctm = Transformation.compress(matrixMultiply(self.matrix, op)) + return Transformation(ctm) + + def __repr__(self) -> str: + return f"Transformation(ctm={self.ctm})" + + class PageObject(DictionaryObject): """ PageObject represents a single page within a PDF file. @@ -245,7 +322,7 @@ def _pushPopGS(contents: Any, pdf: Any) -> ContentStream: # PdfFileReader @staticmethod def _addTransformationMatrix( - contents: Any, pdf: Any, ctm: Iterable[float] + contents: Any, pdf: Any, ctm: CompressedTransformationMatrix ) -> ContentStream: # PdfFileReader # adds transformation matrix at the beginning of the given # contents stream. @@ -298,7 +375,7 @@ def _mergePage( self, page2: "PageObject", page2transformation: Optional[Callable[[Any], ContentStream]] = None, - ctm: Optional[Iterable[float]] = None, + ctm: Optional[CompressedTransformationMatrix] = None, expand: bool = False, ) -> None: # First we work on merging the resource dictionaries. This allows us @@ -396,7 +473,7 @@ def _mergePage( page2.mediaBox.getLowerRight_y().as_numeric(), ] if ctm is not None: - ctm = [float(x) for x in ctm] + ctm = tuple(float(x) for x in ctm) # type: ignore[assignment] new_x = [ ctm[0] * corners2[i] + ctm[2] * corners2[i + 1] + ctm[4] for i in range(0, 8, 2) @@ -424,7 +501,10 @@ def _mergePage( self[NameObject(PG.ANNOTS)] = new_annots def mergeTransformedPage( - self, page2: "PageObject", ctm: Iterable[float], expand: bool = False + self, + page2: "PageObject", + ctm: Union[CompressedTransformationMatrix, Transformation], + expand: bool = False, ) -> None: """ mergeTransformedPage is similar to mergePage, but a transformation @@ -437,10 +517,13 @@ def mergeTransformedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ + if isinstance(ctm, Transformation): + ctm = ctm.ctm + ctm = cast(CompressedTransformationMatrix, ctm) self._mergePage( page2, lambda page2Content: PageObject._addTransformationMatrix( - page2Content, page2.pdf, ctm + page2Content, page2.pdf, ctm # type: ignore[arg-type] ), ctm, expand, @@ -459,8 +542,8 @@ def mergeScaledPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - # CTM to scale : [ sx 0 0 sy 0 0 ] - self.mergeTransformedPage(page2, [scale, 0, 0, scale, 0, 0], expand) + op = Transformation().scale(scale, scale) + self.mergeTransformedPage(page2, op, expand) def mergeRotatedPage( self, page2: "PageObject", rotation: float, expand: bool = False @@ -475,19 +558,8 @@ def mergeRotatedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - rotation = math.radians(rotation) - self.mergeTransformedPage( - page2, - [ - math.cos(rotation), - math.sin(rotation), - -math.sin(rotation), - math.cos(rotation), - 0, - 0, - ], - expand, - ) + op = Transformation().rotate(rotation) + self.mergeTransformedPage(page2, op, expand) def mergeTranslatedPage( self, page2: "PageObject", tx: float, ty: float, expand: bool = False @@ -503,7 +575,8 @@ def mergeTranslatedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - self.mergeTransformedPage(page2, [1, 0, 0, 1, tx, ty], expand) + op = Transformation().translate(tx, ty) + self.mergeTransformedPage(page2, op, expand) def mergeRotatedTranslatedPage( self, @@ -525,23 +598,8 @@ def mergeRotatedTranslatedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - - translation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [-tx, -ty, 1]] - rotation = math.radians(rotation) - rotating: List[List[float]] = [ - [math.cos(rotation), math.sin(rotation), 0], - [-math.sin(rotation), math.cos(rotation), 0], - [0, 0, 1], - ] - rtranslation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]] - ctm = matrixMultiply(translation, rotating) - ctm = matrixMultiply(ctm, rtranslation) - - return self.mergeTransformedPage( - page2, - [ctm[0][0], ctm[0][1], ctm[1][0], ctm[1][1], ctm[2][0], ctm[2][1]], - expand, - ) + op = Transformation().translate(-tx, -ty).rotate(rotation).translate(tx, ty) + return self.mergeTransformedPage(page2, op, expand) def mergeRotatedScaledPage( self, page2: "PageObject", rotation: float, scale: float, expand: bool = False @@ -557,20 +615,8 @@ def mergeRotatedScaledPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - rotation = math.radians(rotation) - rotating: List[List[float]] = [ - [math.cos(rotation), math.sin(rotation), 0], - [-math.sin(rotation), math.cos(rotation), 0], - [0, 0, 1], - ] - scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = matrixMultiply(rotating, scaling) - - self.mergeTransformedPage( - page2, - [ctm[0][0], ctm[0][1], ctm[1][0], ctm[1][1], ctm[2][0], ctm[2][1]], - expand, - ) + op = Transformation().rotate(rotation).scale(scale, scale) + self.mergeTransformedPage(page2, op, expand) def mergeScaledTranslatedPage( self, @@ -592,16 +638,8 @@ def mergeScaledTranslatedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - - translation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]] - scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = matrixMultiply(scaling, translation) - - return self.mergeTransformedPage( - page2, - [ctm[0][0], ctm[0][1], ctm[1][0], ctm[1][1], ctm[2][0], ctm[2][1]], - expand, - ) + op = Transformation().scale(scale, scale).translate(tx, ty) + return self.mergeTransformedPage(page2, op, expand) def mergeRotatedScaledTranslatedPage( self, @@ -626,24 +664,10 @@ def mergeRotatedScaledTranslatedPage( :param bool expand: Whether the page should be expanded to fit the dimensions of the page to be merged. """ - translation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]] - rotation = math.radians(rotation) - rotating: List[List[float]] = [ - [math.cos(rotation), math.sin(rotation), 0], - [-math.sin(rotation), math.cos(rotation), 0], - [0, 0, 1], - ] - scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = matrixMultiply(rotating, scaling) - ctm = matrixMultiply(ctm, translation) - - self.mergeTransformedPage( - page2, - [ctm[0][0], ctm[0][1], ctm[1][0], ctm[1][1], ctm[2][0], ctm[2][1]], - expand, - ) + op = Transformation().rotate(rotation).scale(scale, scale).translate(tx, ty) + self.mergeTransformedPage(page2, op, expand) - def addTransformation(self, ctm: List[float]) -> None: + def addTransformation(self, ctm: CompressedTransformationMatrix) -> None: """ Apply a transformation matrix to the page. @@ -666,7 +690,7 @@ def scale(self, sx: float, sy: float) -> None: :param float sx: The scaling factor on horizontal axis. :param float sy: The scaling factor on vertical axis. """ - self.addTransformation([sx, 0, 0, sy, 0, 0]) + self.addTransformation((sx, 0, 0, sy, 0, 0)) self.mediaBox = RectangleObject( ( float(self.mediaBox.getLowerLeft_x()) * sx, diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 968a32321..67fc20fe2 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -32,9 +32,9 @@ import math import struct -from io import StringIO,BytesIO -from typing import Any, Dict, Optional, Tuple, Union import zlib +from io import BytesIO, StringIO +from typing import Any, Dict, Optional, Tuple, Union from .generic import ArrayObject, DictionaryObject, NameObject @@ -47,12 +47,12 @@ from .constants import ColorSpaces from .constants import FilterTypeAbbreviations as FTA from .constants import FilterTypes as FT +from .constants import GraphicsStateParameters as G from .constants import ImageAttributes as IA from .constants import LzwFilterParameters as LZW from .constants import StreamAttributes as SA -from .constants import GraphicsStateParameters as G from .errors import PdfReadError, PdfStreamError -from .utils import b_,ord_, paethPredictor +from .utils import b_, ord_, paethPredictor def decompress(data: bytes) -> bytes: diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index f275a5286..60c842862 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -44,7 +44,6 @@ from .constants import StreamAttributes as SA from .constants import TypArguments as TA from .constants import TypFitArguments as TF - from .errors import ( STREAM_TRUNCATED_PREMATURELY, PdfReadError, @@ -1340,7 +1339,6 @@ def __init__( self[NameObject("/Page")] = page self[NameObject("/Type")] = typ - # from table 8.2 of the PDF 1.7 reference. if typ == "/XYZ": ( diff --git a/PyPDF2/utils.py b/PyPDF2/utils.py index 19b5533dc..7a247c242 100644 --- a/PyPDF2/utils.py +++ b/PyPDF2/utils.py @@ -33,10 +33,23 @@ from codecs import getencoder from io import BufferedReader, BufferedWriter, BytesIO, FileIO -from typing import Any, Dict, List, Optional, Union, overload +from typing import Any, Dict, Optional, Tuple, Union, overload + +try: + # Python 3.10+: https://www.python.org/dev/peps/pep-0484/ + from typing import TypeAlias # type: ignore[attr-defined] +except ImportError: + from typing_extensions import TypeAlias # type: ignore[misc] from .errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError +TransformationMatrixType: TypeAlias = Tuple[ + Tuple[float, float, float], Tuple[float, float, float], Tuple[float, float, float] +] +CompressedTransformationMatrix: TypeAlias = Tuple[ + float, float, float, float, float, float +] + bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO] StrByteType = Union[str, StreamType] @@ -131,11 +144,13 @@ def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes: return b_("").join(retval) -def matrixMultiply(a: List[List[float]], b: List[List[float]]) -> List[List[float]]: - return [ - [sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b)] +def matrixMultiply( + a: TransformationMatrixType, b: TransformationMatrixType +) -> TransformationMatrixType: + return tuple( # type: ignore[return-value] + tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b)) for row in a - ] + ) def markLocation(stream: StreamType) -> None: diff --git a/PyPDF2/xmp.py b/PyPDF2/xmp.py index 4aff89069..550cbae13 100644 --- a/PyPDF2/xmp.py +++ b/PyPDF2/xmp.py @@ -6,7 +6,7 @@ from xml.dom.minidom import Element as XmlElement from xml.dom.minidom import parseString -from .generic import PdfObject,ContentStream +from .generic import ContentStream, PdfObject from .utils import StreamType RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" diff --git a/docs/user/cropping-and-transforming.md b/docs/user/cropping-and-transforming.md index e1958a4f4..6f3f60186 100644 --- a/docs/user/cropping-and-transforming.md +++ b/docs/user/cropping-and-transforming.md @@ -29,3 +29,73 @@ writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") with open("PyPDF2-output.pdf", "wb") as fp: writer.write(fp) ``` + +## Plain Merge + +![](plain-merge.png) + +is the result of + +```python +from PyPDF2 import PdfFileReader, PdfFileWriter, Transformation + +# Get the data +reader_base = PdfFileReader("labeled-edges-center-image.pdf") +page_base = reader_base.pages[0] + +reader = PdfFileReader("box.pdf") +page_box = reader.pages[0] + +# Apply the transformation: Be aware, that this is an in-place operation +page_base.mergeTransformedPage(page_box, Transformation()) + +# Write the result back +writer = PdfFileWriter() +writer.addPage(page_base) +with open("merged-foo.pdf", "wb") as fp: + writer.write(fp) +``` + +## Merge with Rotation + +![](merge-45-deg-rot.png) + +```python +from PyPDF2 import PdfFileReader, PdfFileWriter, Transformation + +# Get the data +reader_base = PdfFileReader("labeled-edges-center-image.pdf") +page_base = reader_base.pages[0] + +reader = PdfFileReader("box.pdf") +page_box = reader.pages[0] + +# Apply the transformation: Be aware, that this is an in-place operation +op = Transformation().rotate(45) +page_base.mergeTransformedPage(page_box, op) + +# Write the result back +writer = PdfFileWriter() +writer.addPage(page_base) +with open("merged-foo.pdf", "wb") as fp: + writer.write(fp) +``` + +If you add the expand parameter: + +```python +op = Transformation().rotate(45) +page_base.mergeTransformedPage(page_box, op, expand=True) +``` + +you get: + +![](merge-rotate-expand.png) + +Alternatively, you can move the merged image a bit to the right by using + +```python +op = Transformation().rotate(45).translate(tx=50) +``` + +![](merge-translated.png) diff --git a/docs/user/merge-45-deg-rot.png b/docs/user/merge-45-deg-rot.png new file mode 100644 index 000000000..7af475679 Binary files /dev/null and b/docs/user/merge-45-deg-rot.png differ diff --git a/docs/user/merge-rotate-expand.png b/docs/user/merge-rotate-expand.png new file mode 100644 index 000000000..2ef0873c8 Binary files /dev/null and b/docs/user/merge-rotate-expand.png differ diff --git a/docs/user/merge-translated.png b/docs/user/merge-translated.png new file mode 100644 index 000000000..dfdd1630f Binary files /dev/null and b/docs/user/merge-translated.png differ diff --git a/docs/user/plain-merge.png b/docs/user/plain-merge.png new file mode 100644 index 000000000..543ddeb43 Binary files /dev/null and b/docs/user/plain-merge.png differ diff --git a/resources/box.pdf b/resources/box.pdf new file mode 100644 index 000000000..a390ea2f1 Binary files /dev/null and b/resources/box.pdf differ diff --git a/resources/labeled-edges-center-image.pdf b/resources/labeled-edges-center-image.pdf new file mode 100644 index 000000000..a9789f239 Binary files /dev/null and b/resources/labeled-edges-center-image.pdf differ diff --git a/tests/test_page.py b/tests/test_page.py index 780a45e7e..a9b4f3c82 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -3,7 +3,7 @@ import pytest -from PyPDF2 import PdfFileReader +from PyPDF2 import PdfFileReader, Transformation from PyPDF2._page import PageObject from PyPDF2.generic import RectangleObject @@ -128,3 +128,13 @@ def test_page_rotation_non90(): with pytest.raises(ValueError) as exc: page.rotateClockwise(91) assert exc.value.args[0] == "Rotation angle must be a multiple of 90" + + +def test_page_scale(): + op = Transformation() + with pytest.raises(ValueError) as exc: + op.scale() + assert exc.value.args[0] == "Either sx or sy must be specified" + + assert op.scale(sx=2).ctm == (2, 0, 0, 2, 0, 0) + assert op.scale(sy=3).ctm == (3, 0, 0, 3, 0, 0) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8c4feb7e8..78fa68cb3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -63,9 +63,9 @@ def test_readUntilRegex_premature_ending_name(): @pytest.mark.parametrize( ("a", "b", "expected"), [ - ([[3]], [[7]], [[21]]), - ([[3, 7]], [[5], [13]], [[3 * 5.0 + 7 * 13]]), - ([[3], [7]], [[5, 13]], [[3 * 5, 3 * 13], [7 * 5, 7 * 13]]), + (((3,),), ((7,),), ((21,),)), + (((3, 7),), ((5,), (13,)), ((3 * 5.0 + 7 * 13,),)), + (((3,), (7,)), ((5, 13),), ((3 * 5, 3 * 13), (7 * 5, 7 * 13))), ], ) def test_matrixMultiply(a, b, expected):