-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdfImgExtract.py
32 lines (24 loc) · 1 KB
/
pdfImgExtract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!python3.9
import argparse
from os import makedirs, path
import fitz # PyMuPDF
parser = argparse.ArgumentParser(description="Extracts images from pdf file")
parser.add_argument("input", nargs="?", default="a.pdf", help="Input file.")
def get_pixmaps_in_pdf(pdf_filename):
doc = fitz.open(pdf_filename)
xrefs = set()
for page_index in range(doc.pageCount):
for image in doc.getPageImageList(page_index):
xrefs.add(image[0]) # Add XREFs to set so duplicates are ignored
pixmaps = [fitz.Pixmap(doc, xref) for xref in xrefs]
doc.close()
return pixmaps
def write_pixmaps_to_pngs(pixmaps, output_dir):
for i, pixmap in enumerate(pixmaps):
pixmap.writePNG(f"{output_dir}\\{i}.png")
if __name__ == "__main__":
args = parser.parse_args()
output_dir = path.basename(path.splitext(args.input)[0])
makedirs(output_dir, exist_ok=True)
pixmaps = get_pixmaps_in_pdf(args.input)
write_pixmaps_to_pngs(pixmaps, output_dir)