-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremover.py
75 lines (55 loc) · 2.83 KB
/
remover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import argparse
from modules.pdf_image_extractor import PDFImageExtractor
from modules.controller.mask_drawing_controller import MaskDrawing
from modules.controller.mask_erosion_dilation_controller import MaskErosionDilation
from modules.controller.mask_thresholding_controller import MaskThresholding
from modules.controller.mask_selector_controller import MaskSelector
from modules.controller.parameter_adjuster_controller import ParameterAdjuster
from modules.watermark_remover import WatermarkRemover
# Global variables
PDF_PATH = 'input.pdf'
SAVE_PATH = 'output.pdf'
# Set the maximum width and height for the images during the masking process
# The final PDF will have the same dimensions as the original PDF
MAX_WIDTH = 1400
MAX_HEIGHT = 800
# Set the DPI for the images, this affects the quality of the final PDF
DPI = 200
def parse_args():
parser = argparse.ArgumentParser(description='Remove watermark from PDF.')
parser.add_argument('pdf_path', type=str, nargs='?', default=PDF_PATH, help='Path to the input PDF file.')
parser.add_argument('save_path', type=str, nargs='?', default=SAVE_PATH, help='Path to save the output PDF file.')
parser.add_argument('--dpi', type=int, default=DPI, help='DPI for the images. Default is 300.')
parser.add_argument('--max_width', type=int, default=MAX_WIDTH, help=f'Maximum width for the images. Default is {MAX_WIDTH}.')
parser.add_argument('--max_height', type=int, default=MAX_HEIGHT, help=f'Maximum height for the images. Default is {MAX_HEIGHT}.')
return parser.parse_args()
def main():
args = parse_args()
# Extract images from the PDF
image_extractor = PDFImageExtractor(args.pdf_path, args.dpi, args.max_width, args.max_height)
images_for_mask_making = image_extractor.get_images_for_mask_making()
images_for_watermark_removal = image_extractor.get_images_for_watermark_removal()
# Draw the initial mask
selector = MaskSelector(images_for_mask_making)
selector.run()
drawn_mask = selector.get_gray_mask()
# Threshold the mask
mask_thresholder = MaskThresholding(images_for_mask_making, drawn_mask)
mask_thresholder.run()
# Draw on the mask
mask_drawer = MaskDrawing(mask_thresholder.get_gray_mask())
mask_drawer.run()
# Erode and dilate the mask
mask_eroder_dilater = MaskErosionDilation(mask_drawer.get_gray_mask())
mask_eroder_dilater.run()
bgr_mask = mask_eroder_dilater.get_bgr_mask()
# Set the color range to be filtered/removed
color_adjuster = ParameterAdjuster(images_for_mask_making, bgr_mask)
color_adjuster.run()
parameters = color_adjuster.get_parameters()
# Remove the watermark and save the final PDF
remover = WatermarkRemover(images_for_watermark_removal, bgr_mask, parameters)
remover.remove_watermark()
remover.save_pdf(args.save_path)
if __name__ == "__main__":
main()