MAINT: Add initial type support with mypy (#853)

This includes adding a type m
py-pdf · May 2, 2022 · b580a45 · b580a45
1 parent f06375e
commit b580a45
Show file tree

Hide file tree

Showing 11 changed files with 180 additions and 122 deletions.
diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml
@@ -52,6 +52,9 @@ jobs:
       run: |
         python -OO -m coverage run --parallel-mode -m pytest tests -vv
       if: matrix.python-version == '3.10.1'
+    - name: Test with mypy
+      run : |
+        mypy PyPDF2 --show-error-codes
     - name: Upload coverage data
       uses: actions/upload-artifact@v3
       with:

diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -34,6 +34,11 @@
 import struct
 from io import StringIO
 
+try:
+    from typing import Literal  # type: ignore[attr-defined]
+except ImportError:
+    from typing_extensions import Literal  # type: ignore[misc]
+
 from PyPDF2.constants import CcittFaxDecodeParameters as CCITT
 from PyPDF2.constants import ColorSpaces
 from PyPDF2.constants import FilterTypeAbbreviations as FTA
@@ -66,8 +71,8 @@ def compress(data):
 except ImportError:  # pragma: no cover
     # Unable to import zlib.  Attempt to use the System.IO.Compression
     # library from the .NET framework. (IronPython only)
-    import System
-    from System import IO, Array
+    import System  # type: ignore[import]
+    from System import IO, Array  # type: ignore[import]
 
     def _string_to_bytearr(buf):
         retval = Array.CreateInstance(System.Byte, len(buf))
@@ -536,7 +541,7 @@ def _xobj_to_image(x_object_obj):
     size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
     data = x_object_obj.getData()
     if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB:
-        mode = "RGB"
+        mode: Literal["RGB", "P"] = "RGB"
     else:
         mode = "P"
     extension = None

diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -38,6 +38,7 @@
 import re
 import warnings
 from io import BytesIO
+from typing import Dict, Optional
 
 from PyPDF2.constants import FilterTypes as FT
 from PyPDF2.constants import StreamAttributes as SA
@@ -57,7 +58,13 @@
 IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
 
 
-def readObject(stream, pdf):
+class PdfObject:
+    def getObject(self):
+        """Resolve indirect references."""
+        return self
+
+
+def readObject(stream, pdf) -> PdfObject:
     tok = stream.read(1)
     stream.seek(-1, 1)  # reset to start
     idx = ObjectPrefix.find(tok)
@@ -101,12 +108,6 @@ def readObject(stream, pdf):
             return NumberObject.readFromStream(stream)
 
 
-class PdfObject:
-    def getObject(self):
-        """Resolve indirect references."""
-        return self
-
-
 class NullObject(PdfObject):
     def writeToStream(self, stream, encryption_key):
         stream.write(b_("null"))
@@ -508,7 +509,7 @@ def readFromStream(stream, pdf):
             # Name objects should represent irregular characters
             # with a '#' followed by the symbol's hex number
             if not pdf.strict:
-                warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
+                warnings.warn("Illegal character in Name Object", PdfReadWarning)
                 return NameObject(name)
             else:
                 raise PdfReadError("Illegal character in Name Object")
@@ -805,10 +806,18 @@ def emptyTree(self):
 
 class StreamObject(DictionaryObject):
     def __init__(self):
-        self._data = None
+        self.__data: Optional[str] = None
         self.decodedSelf = None
 
-    def writeToStream(self, stream, encryption_key):
+    @property
+    def _data(self):
+        return self.__data
+
+    @_data.setter
+    def _data(self, value):
+        self.__data = value
+
+    def writeToStream(self, stream, encryption_key) -> None:
         self[NameObject(SA.LENGTH)] = NumberObject(len(self._data))
         DictionaryObject.writeToStream(self, stream, encryption_key)
         del self[SA.LENGTH]
@@ -989,7 +998,8 @@ def _readInlineImage(self, stream):
                     data.write(tok)
         return {"settings": settings, "data": data.getvalue()}
 
-    def _getData(self):
+    @property
+    def _data(self):
         newdata = BytesIO()
         for operands, operator in self.operations:
             if operator == b_("INLINE IMAGE"):
@@ -1008,11 +1018,10 @@ def _getData(self):
             newdata.write(b_("\n"))
         return newdata.getvalue()
 
-    def _setData(self, value):
+    @_data.setter
+    def _data(self, value):
         self.__parseContentStream(BytesIO(b_(value)))
 
-    _data = property(_getData, _setData)
-
 
 class RectangleObject(ArrayObject):
     """
@@ -1686,7 +1695,7 @@ def decode_pdfdocencoding(byte_array):
 
 assert len(_pdfDocEncoding) == 256
 
-_pdfDocEncoding_rev = {}
+_pdfDocEncoding_rev: Dict[str, int] = {}
 for i in range(256):
     char = _pdfDocEncoding[i]
     if char == "\u0000":

diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py
@@ -27,6 +27,7 @@
 
 from io import BytesIO
 from io import FileIO as file
+from typing import List, Optional, Union
 
 from PyPDF2._reader import PdfFileReader
 from PyPDF2._writer import PdfFileWriter
@@ -37,6 +38,8 @@
 
 StreamIO = BytesIO
 
+ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore"
+
 
 class _MergedPage:
     """
@@ -68,7 +71,7 @@ class PdfFileMerger:
     def __init__(self, strict=False):
         self.inputs = []
         self.pages = []
-        self.output = PdfFileWriter()
+        self.output: Optional[PdfFileWriter] = PdfFileWriter()
         self.bookmarks = []
         self.named_dests = []
         self.id_count = 0
@@ -220,6 +223,8 @@ def write(self, fileobj):
         :param fileobj: Output file. Can be a filename or any kind of
             file-like object.
         """
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         my_file = False
         if isinstance(fileobj, str):
             fileobj = file(fileobj, "wb")
@@ -267,6 +272,8 @@ def addMetadata(self, infos):
             and each value is your new metadata.
             Example: ``{u'/Title': u'My title'}``
         """
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         self.output.addMetadata(infos)
 
     def setPageLayout(self, layout):
@@ -293,6 +300,8 @@ def setPageLayout(self, layout):
            * - /TwoPageRight
              - Show two pages at a time, odd-numbered pages on the right
         """
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         self.output.setPageLayout(layout)
 
     def setPageMode(self, mode):
@@ -317,6 +326,8 @@ def setPageMode(self, mode):
            * - /UseAttachments
              - Show attachments panel
         """
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         self.output.setPageMode(mode)
 
     def _trim_dests(self, pdf, dests, pages):
@@ -359,6 +370,8 @@ def _trim_outline(self, pdf, outline, pages):
         return new_outline
 
     def _write_dests(self):
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         for named_dest in self.named_dests:
             pageno = None
             if "/Page" in named_dest:
@@ -371,6 +384,8 @@ def _write_dests(self):
                 self.output.addNamedDestinationObject(named_dest)
 
     def _write_bookmarks(self, bookmarks=None, parent=None):
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         if bookmarks is None:
             bookmarks = self.bookmarks
 
@@ -533,13 +548,15 @@ def addBookmark(
         :param str fit: The fit of the destination page. See
             :meth:`addLink()<addLin>` for details.
         """
+        if self.output is None:
+            raise RuntimeError(ERR_CLOSED_WRITER)
         if len(self.output.getObject(self.output._pages)["/Kids"]) > 0:
             page_ref = self.output.getObject(self.output._pages)["/Kids"][pagenum]
         else:
             page_ref = self.output.getObject(self.output._pages)
 
         action = DictionaryObject()
-        zoom_args = []
+        zoom_args: List[Union[NumberObject, NullObject]] = []
         for a in args:
             if a is not None:
                 zoom_args.append(NumberObject(a))

diff --git a/PyPDF2/pagerange.py b/PyPDF2/pagerange.py
@@ -8,6 +8,7 @@
 """
 
 import re
+from typing import List, Tuple, Union
 
 from PyPDF2.errors import ParseError
 
@@ -96,6 +97,7 @@ def to_slice(self):
     def __str__(self):
         """A string like "1:2:3"."""
         s = self._slice
+        indices: Union[Tuple[int, int], Tuple[int, int, int]]
         if s.step is None:
             if s.start is not None and s.stop == s.start + 1:
                 return str(s.start)
@@ -133,7 +135,7 @@ def parse_filename_page_ranges(args):
     expressions, slice objects, or PageRange objects.
     A filename not followed by a page range indicates all pages of the file.
     """
-    pairs = []
+    pairs: List[Tuple[str, PageRange]] = []
     pdf_filename = None
     did_page_range = False
     for arg in args + [None]:

diff --git a/PyPDF2/py.typed b/PyPDF2/py.typed