-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathgenattrs.py
executable file
·100 lines (85 loc) · 3.39 KB
/
genattrs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import re
import subprocess
from lxml import html
self_path = os.path.abspath(__file__)
HEADER = '''\
// Do not edit
// Generated by genattrs.py
'''
def generate_attr_headers(attrs):
with open("src/attr_strings.h", "wb") as attr_strings, \
open("src/attr_enum.h", "wb") as attr_enum, \
open("src/attr_sizes.h", "wb") as attr_sizes:
for f in (attr_strings, attr_enum, attr_sizes):
f.write(HEADER.encode('utf-8'))
for attr in attrs:
attr_upper = attr.upper().replace('-', '_').replace(':', '_')
attr_strings.write(('"%s",\n' % attr).encode('utf-8'))
attr_enum.write(('HTML_ATTR_%s,\n' % attr_upper).encode('utf-8'))
attr_sizes.write(('%d, ' % len(attr)).encode('utf-8'))
attr_sizes.write(b'\n')
def generate_attr_perfect_hash(attrs, repetitions=400):
p = subprocess.Popen(
'gperf -LANSI-C -H attr_hash -m{} /dev/stdin'.format(repetitions).split(),
stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
stdout = p.communicate('\n'.join(attrs).encode('utf-8'))[0]
if p.wait() != 0:
raise SystemExit(p.returncode)
raw = stdout.decode('utf-8').splitlines()
for i, line in enumerate(raw):
if line.startswith('in_word_set'):
break
else:
raise SystemExit('Failed to find in_word_set()')
lines = raw[:i - 1]
del raw[:i - 1]
raw = '\n'.join(raw)
wordlist = re.search("wordlist\[\]\s+=\s+{(.*?)}", raw, re.DOTALL)
if wordlist is None:
raise SystemExit('Failed to find wordlist')
wordlist = [w.strip().replace('"', '') for w in wordlist.group(1).split(',')]
attrlist = ["\tHTML_ATTR_" + (w.upper().replace('-', '_').replace(':', '_') if w else 'LAST')
for w in wordlist]
processed = '\n'.join(lines) + '\n\n'
processed += 'static const HTMLAttr HTML_ATTR_MAP[] = {\n%s\n};' % '\n,'.join(attrlist)
processed = re.sub(
r'.+^attr_hash',
HEADER + 'static inline unsigned int\nattr_hash',
processed,
flags=re.DOTALL | re.MULTILINE)
with open('src/attr_perf.h', 'wb') as f:
f.write(processed.encode('utf-8'))
f.write(b'\n')
def get_attr_names():
# HTML Attributes from
# https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
raw = open('/t/Attributes', 'rb').read()
root = html.fromstring(raw)
table = root.xpath('//table[@class="standard-table"]/tbody')[0]
for tr in table.findall('tr'):
td = tr.find('td')
code = td.find('code')
attr = code.text
if attr and '*' not in attr:
yield attr.strip()
# SVG Attributes from
# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute
raw = open('/t/Attribute', 'rb').read()
root = html.fromstring(raw)
h2 = root.xpath('//h2[@id="SVG_Attributes"]')[0]
for ul in h2.xpath('following-sibling::div[1]/ul'):
for attr in ul.xpath('./li/code/a/text()'):
yield attr.strip()
def main():
os.chdir(os.path.dirname(self_path))
attrs = sorted(set(get_attr_names()) | {'data-reactid'})
generate_attr_headers(attrs)
generate_attr_perfect_hash(attrs)
if __name__ == '__main__':
main()