-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathutils.py
134 lines (106 loc) · 3.78 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import io
import os
import base64
import logging
import numpy as np
from PIL import Image
from config import *
__all__ = ["text2vector", "vector2text", "img2array",
"next_train_batch", "next_train_text_and_image", "next_test_text_and_image",
"samples_dir", "checkpoints_dir", "InvalidCaptchaError"]
# 训练验证码
TRAIN_CAPTCHA = os.path.join(os.path.dirname(__file__), "samples", "train_mixed_captcha_base64.txt")
# 测试验证码
TEST_CAPTCHA = os.path.join(os.path.dirname(__file__), "samples", "test_mixed_captcha_base64.txt")
def text2vector(text):
text_len = len(text)
if text_len > CAPTCHA_LEN:
raise ValueError("Max captcha is 4 chars!")
vector = np.zeros(CHAR_SET_LEN * CAPTCHA_LEN)
def char2pos(c):
k = ord(c) - 48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - 61
if k > 61:
raise ValueError("No map!")
return k
for i, c in enumerate(text):
idx = i * CHAR_SET_LEN + char2pos(c)
vector[idx] = 1
return vector
def vector2text(vec):
char_pos = vec.nonzero()[0]
text = []
for i, c in enumerate(char_pos):
char_idx = c % CHAR_SET_LEN
if char_idx < 10:
char_code = char_idx + ord('0')
elif char_idx < 36:
char_code = char_idx - 10 + ord('A')
elif char_idx < 62:
char_code = char_idx - 36 + ord('a')
else:
raise ValueError('error')
text.append(chr(char_code))
return "".join(text)
class InvalidCaptchaError(OSError):
pass
def img2array(image):
try:
if isinstance(image, str):
img_bytes = base64.decodebytes(image.encode("utf8"))
image = Image.open(io.BytesIO(img_bytes)).convert("L")
image = np.array(image).flatten() / 255
return image
except OSError:
raise InvalidCaptchaError()
_train_captcha = open(TRAIN_CAPTCHA, "r")
_test_captcha = open(TEST_CAPTCHA, "r")
def next_train_text_and_image():
global _train_captcha
try:
line = next(_train_captcha)
text, img_base64 = line.split(":")[-2:]
image = img2array(img_base64)
return text, image
except StopIteration:
_train_captcha.close()
_train_captcha = open(TRAIN_CAPTCHA, "r")
logging.warning("Not enough captcha! Loop reading lines from same file!")
return next_train_text_and_image()
except InvalidCaptchaError:
logging.warning("Invalid captcha error! Next train text and image!", exc_info=True)
return next_train_text_and_image()
def next_test_text_and_image():
global _test_captcha
try:
line = next(_test_captcha)
clazz, text, img_base64 = line.split(":")
image = img2array(img_base64)
return text, image
except StopIteration:
_test_captcha.close()
_test_captcha = open(TEST_CAPTCHA, "r")
logging.warning("Not enough test captcha! Loop reading lines from same file!")
return next_test_text_and_image()
except InvalidCaptchaError:
logging.warning("Invalid captcha error! Next test text and image!", exc_info=True)
return next_test_text_and_image()
def next_train_batch(batch_size=64):
"""
produce batch sample for prediction
:param batch_size: default 64
"""
xs = np.zeros([batch_size, IMG_WIDTH * IMG_HEIGHT])
ys = np.zeros([batch_size, CAPTCHA_LEN * CHAR_SET_LEN])
for i in range(batch_size):
text, image = next_train_text_and_image()
xs[i, :] = image
ys[i, :] = text2vector(text)
return xs, ys
def checkpoints_dir():
return os.path.join(os.path.dirname(__file__), "train", "checkpoints")
def samples_dir():
return os.path.join(os.path.dirname(__file__), "samples")