forked from buriy/python-readability
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgen_test.py
178 lines (150 loc) · 5.19 KB
/
gen_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""
This program facilitates the creation of a regression test case as used by the
test module. It uses the current readability algorithm to capture a benchmark
and construct a new test case.
"""
from regression_test import (
TEST_DATA_PATH,
READABLE_SUFFIX,
YAML_EXTENSION,
read_yaml
)
import argparse
import errno
import os
import os.path
import readability
import readability.urlfetch as urlfetch
import sys
import urllib2
import urlparse
import yaml
OVERWRITE_QUESTION = '%s exists; overwrite and continue (y/n)? '
def y_or_n(question):
while True:
response = raw_input(question).strip()
if len(response) > 0:
return response[0] in ['y', 'Y']
def write_file(path, data):
mode = 0644
try:
fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
except OSError as e:
if e.errno == errno.EEXIST:
if y_or_n(OVERWRITE_QUESTION % path):
fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, mode)
else:
return False
else:
raise e
f = os.fdopen(fd, 'w')
f.write(data)
return True
def make_readable_path(base_path, url_map, url):
# We put the readable version of the page next to the original so that all
# of the relative links work when we open it in a browser.
rel_path = url_map[url]
path = os.path.join(base_path, rel_path)
return ''.join([path, READABLE_SUFFIX])
def write_readable(base_path, fetcher, url_map, url):
orig = fetcher.urlread(url)
options = {'url': url, 'urlfetch': fetcher}
rdbl_doc = readability.Document(orig, **options)
summary = rdbl_doc.summary()
path = make_readable_path(base_path, url_map, url)
return write_file(path, summary.html)
def read_spec(test_name):
yaml_path = os.path.join(TEST_DATA_PATH, test_name + YAML_EXTENSION)
return read_yaml(yaml_path)
def write_spec(base_path, spec):
spec_yaml = yaml.dump(spec, default_flow_style = False)
path = base_path + YAML_EXTENSION
return write_file(path, spec_yaml)
def maybe_mkdir(path):
try:
os.mkdir(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise e
def create(args):
spec = {'url': args.url, 'test_description': args.test_description}
# We retrieve the page and all of its prerequisites so that it can be
# displayed fully locally. base_path is the path to the directory that
# holds the structure of the site(s) necessary for the prerequisites.
base_path = os.path.join(TEST_DATA_PATH, args.test_name)
maybe_mkdir(base_path)
url_map = dict()
fetcher = readability.urlfetch.LocalCopyUrlFetch(base_path, url_map)
if not write_readable(base_path, fetcher, url_map, args.url):
return False
spec['url_map'] = url_map
if not write_spec(base_path, spec):
return False
return True
def genbench(args):
spec = read_spec(args.test_name)
url = spec['url']
base_path = os.path.join(TEST_DATA_PATH, args.test_name)
if args.refetch:
url_map = dict()
fetcher = readability.urlfetch.LocalCopyUrlFetch(base_path, url_map)
else:
url_map = spec['url_map']
fetcher = readability.urlfetch.MockUrlFetch(base_path, url_map)
if not write_readable(base_path, fetcher, url_map, url):
return False
if args.refetch:
# We potentially refetched different pages than the existing test, so
# we have to update the spec accordingly.
spec['url_map'] = url_map
if not write_spec(base_path, spec):
return False
return True
DESCRIPTION = 'Create a readability regression test case.'
def main():
parser = argparse.ArgumentParser(description = DESCRIPTION)
subparsers = parser.add_subparsers(help = 'available subcommands')
parser_create = subparsers.add_parser(
'create',
help = 'create an entirely new test'
)
parser_create.add_argument(
'url',
metavar = 'url',
help = 'the url for which to generate a test'
)
parser_create.add_argument(
'test_name',
metavar = 'test-name',
help = 'the name of the test'
)
parser_create.add_argument(
'test_description',
metavar = 'test-description',
help = 'the description of the test'
)
parser_create.set_defaults(func = create)
parser_genbench = subparsers.add_parser(
'genbench',
help = 'regenerate the benchmark for an existing test'
)
parser_genbench.add_argument(
'test_name',
metavar = 'test-name',
help = 'the name of the test'
)
parser_genbench.add_argument(
'--refetch',
dest = 'refetch',
action = 'store_const',
const = True,
default = False,
help = 'if set, original html is refetched from the url'
)
parser_genbench.set_defaults(func = genbench)
args = parser.parse_args()
result = args.func(args)
if not result:
print('test was not fully generated')
if __name__ == '__main__':
main()