-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocess_images.py
76 lines (62 loc) · 2.74 KB
/
preprocess_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import numpy as np
from PIL import Image
import warnings
from multiprocessing import Pool
from tqdm import tqdm
import cv2
def trim(im):
"""
Converts image to grayscale using cv2, then computes binary matrix
of the pixels that are above a certain threshold, then takes out
the first row where a certain percetage of the pixels are above the
threshold will be the first clip point. Same idea for col, max row, max col.
"""
percentage = 0.02
img = np.array(im)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
im = img_gray > 0.1 * np.mean(img_gray[img_gray != 0])
row_sums = np.sum(im, axis=1)
col_sums = np.sum(im, axis=0)
rows = np.where(row_sums > img.shape[1] * percentage)[0]
cols = np.where(col_sums > img.shape[0] * percentage)[0]
min_row, min_col = np.min(rows), np.min(cols)
max_row, max_col = np.max(rows), np.max(cols)
im_crop = img[min_row : max_row + 1, min_col : max_col + 1]
return Image.fromarray(im_crop)
def resize_maintain_aspect(image, desired_size):
"""
Stole this from some stackoverflow post but can't remember which,
this will add padding to maintain the aspect ratio.
"""
old_size = image.size # old_size[0] is in (width, height) format
ratio = float(desired_size) / max(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
im = image.resize(new_size, Image.ANTIALIAS)
new_im = Image.new("RGB", (desired_size, desired_size))
new_im.paste(im, ((desired_size - new_size[0]) // 2, (desired_size - new_size[1]) // 2))
return new_im
def save_single(args):
img_file, input_path_folder, output_path_folder, output_size = args
image_original = Image.open(os.path.join(input_path_folder, img_file))
image = trim(image_original)
image = resize_maintain_aspect(image, desired_size=output_size[0])
image.save(os.path.join(output_path_folder + img_file))
def fast_image_resize(input_path_folder, output_path_folder, output_size=None):
"""
Uses multiprocessing to make it fast
"""
if not output_size:
warnings.warn("Need to specify output_size! For example: output_size=100")
exit()
if not os.path.exists(output_path_folder):
os.makedirs(output_path_folder)
jobs = [
(file, input_path_folder, output_path_folder, output_size)
for file in os.listdir(input_path_folder)
]
with Pool() as p:
list(tqdm(p.imap_unordered(save_single, jobs), total=len(jobs)))
if __name__ == "__main__":
fast_image_resize("../train/images/", "../train/images_resized_150/", output_size=(150, 150))
fast_image_resize("../test/images/", "../test/images_resized_150/", output_size=(150, 150))