Skip to content

Commit

Permalink
Merge pull request #1166 from myhloli/dev
Browse files Browse the repository at this point in the history
fix(pre_proc): prevent errors when imageWriter is None
  • Loading branch information
myhloli authored Dec 2, 2024
2 parents 384e037 + b0529b6 commit a7296f7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions magic_pdf/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def fitz_doc_to_image(doc, dpi=200) -> dict:
mat = fitz.Matrix(dpi / 72, dpi / 72)
pm = doc.get_pixmap(matrix=mat, alpha=False)

# If the width or height exceeds 9000 after scaling, do not scale further.
if pm.width > 9000 or pm.height > 9000:
# If the width or height exceeds 4500 after scaling, do not scale further.
if pm.width > 4500 or pm.height > 4500:
pm = doc.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)

img = Image.frombytes('RGB', (pm.width, pm.height), pm.samples)
Expand Down
4 changes: 2 additions & 2 deletions magic_pdf/pre_proc/cut_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ def return_path(type):
for span in spans:
span_type = span['type']
if span_type == ContentType.Image:
if not check_img_bbox(span['bbox']):
if not check_img_bbox(span['bbox']) or not imageWriter:
continue
span['image_path'] = cut_image(span['bbox'], page_id, page, return_path=return_path('images'),
imageWriter=imageWriter)
elif span_type == ContentType.Table:
if not check_img_bbox(span['bbox']):
if not check_img_bbox(span['bbox']) or not imageWriter:
continue
span['image_path'] = cut_image(span['bbox'], page_id, page, return_path=return_path('tables'),
imageWriter=imageWriter)
Expand Down

0 comments on commit a7296f7

Please sign in to comment.