-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTnDScreen.py
59 lines (48 loc) · 1.37 KB
/
TnDScreen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import sys
import re
import codecs
class Screen:
def __init__(self, file_name):
self.scr = []
line = []
with open(file_name, 'rb') as fn:
f = fn.read()
self.scr = f.split('\r\x00\n\x00')
def findall(self, pattern):
ixs = []
for i, line in enumerate(self.scr):
ix = [m.start() for m in re.finditer(pattern, line)]
if len(ix) > 0:
ixs.append((i,ix))
return ixs
def splitall(self, pattern):
sp = []
for line in self.scr:
s = line.split(pattern)
sp.append(s)
return sp
# DC1 (\x13) seems to break scr[1] between unicode data and header
# Seeing a lot of \xfe\xff\xff around
# \xfe\ff is BOM
#
# Text is encoded utf-16-le (little endian)
# We have some illegal characters coming up with \xDE. Code points
# U+D800 to U+DFFF are reserved for encoding lead and trail
# surrogates. Might be able to isolate these areas by splitting the
# sections up using the BOMs scattered through out.
def findtot(findres):
c = 0
for f in findres:
c = c + len(f[1])
return c
def get_types(splitline):
new = []
for p in splitline:
t = p[:2]
v = None
try:
v = unicode(p[2:], 'utf-16-le')
except:
v = p[2:]
new.append((t,v))
return new