Skip to content

Commit

Permalink
[chore]: Pypdfium2 compatibility fix (#1239)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixT2K authored Jul 10, 2023
1 parent e04e183 commit 4e1985f
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 10 deletions.
11 changes: 3 additions & 8 deletions doctr/io/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from pathlib import Path
from typing import Any, List, Optional

import numpy as np
Expand Down Expand Up @@ -31,16 +30,12 @@ def read_pdf(
scale: rendering scale (1 corresponds to 72dpi)
rgb_mode: if True, the output will be RGB, otherwise BGR
password: a password to unlock the document, if encrypted
kwargs: additional parameters to :meth:`pypdfium2.PdfDocument.render_to`
kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
the list of pages decoded as numpy ndarray of shape H x W x C
"""

if isinstance(file, Path):
file = str(file)

# Rasterise pages to numpy ndarrays with pypdfium2
pdf = pdfium.PdfDocument(file, password=password)
renderer = pdf.render_to(pdfium.BitmapConv.numpy_ndarray, scale=scale, rev_byteorder=rgb_mode, **kwargs)
return [img for img, _ in renderer]
pdf = pdfium.PdfDocument(file, password=password, autoclose=True)
return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"scipy>=1.4.0,<2.0.0",
"h5py>=3.1.0,<4.0.0",
"opencv-python>=4.5.0,<5.0.0",
"pypdfium2>=3.3.0,<4.0.0",
"pypdfium2>=4.0.0,<5.0.0",
"pyclipper>=1.2.0,<2.0.0",
"shapely>=1.6.0,<3.0.0",
"langdetect>=1.0.9,<2.0.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/common/test_utils_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def test_extract_rcrops(mock_pdf): # noqa: F811
abs_boxes = deepcopy(rel_boxes)
abs_boxes[:, :, 0] *= doc_img.shape[1]
abs_boxes[:, :, 1] *= doc_img.shape[0]
abs_boxes = abs_boxes.astype(np.int)
abs_boxes = abs_boxes.astype(np.int64)

with pytest.raises(AssertionError):
geometry.extract_rcrops(doc_img, np.zeros((1, 8)))
Expand Down

0 comments on commit 4e1985f

Please sign in to comment.