-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmutf7.py
124 lines (96 loc) · 3.11 KB
/
mutf7.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""
This awesome piece of code is intented to encode and decode modified UTF-7
(the one that is used for IMAP folder names)
See https://datatracker.ietf.org/doc/html/rfc2060.html#section-5.1.3
encode_mutf7(text) - to encode
decode_mutf7(text) - to decode
"""
__author__ = "https://github.com/cheshire-mouse"
__license__ = "WTFPL v. 2"
import base64
import re
class InvalidUTF7FormatException(Exception):
"""Exception raised if not utf7 format"""
def __init__(self, message):
self.message = message
ascii_codes = set(range(0x20, 0x7F))
def __get_ascii(text):
pos = 0
for c in text:
if ord(c) not in ascii_codes:
break
pos += 1
return text[:pos].encode("ascii")
def __remove_ascii(text):
pos = 0
for c in text:
if ord(c) not in ascii_codes:
break
pos += 1
return text[pos:]
def __get_nonascii(text):
pos = 0
for c in text:
if ord(c) in ascii_codes:
break
pos += 1
return text[:pos]
def __remove_nonascii(text):
pos = 0
for c in text:
if ord(c) in ascii_codes:
break
pos += 1
return text[pos:]
def __encode_modified_utf7(text):
# modified base64 - good old base64 without padding characters (=)
result = base64.b64encode(text.encode("utf-16be")).rstrip("=")
result = result.replace("/", ",")
result = "&" + result + "-"
return result
def encode_mutf7(text):
"""Decode the text in modified utf-7"""
result = ""
text = text.replace("&", "&-")
while len(text) > 0:
result += __get_ascii(text)
text = __remove_ascii(text)
if len(text) > 0:
result += __encode_modified_utf7(__get_nonascii(text))
text = __remove_nonascii(text)
return result
def __check_utf7_format(text):
if re.match(r"^.*[^\040-\176].*$", text):
raise InvalidUTF7FormatException("Invalid character for UTF7 encoding")
if re.match(r"^.*&[^-]*(&.*$|$)", text):
raise InvalidUTF7FormatException("BASE64 section is not closed")
if re.match(r"^.*&[A-Za-z0-9+,]*[^\-A-Za-z0-9+,].*$", text):
raise InvalidUTF7FormatException("Invalid character for BASE64 encoding")
if re.match(r"^.*&[^-&]+-&[^-&]+-.*$", text):
raise InvalidUTF7FormatException("Null shifts are not permitted")
return
def __decode_modified_utf7(text):
if text == "&-":
return "&"
# remove leading & and trailing -
text_mb64 = text[1:-1]
text_b64 = text_mb64.replace(",", "/")
# back to normal base64 with padding
while len(text_b64) % 4 != 0:
text_b64 += "="
text_u16 = base64.b64decode(text_b64)
result = text_u16.decode("utf-16be")
return result
def decode_mutf7(text):
"""Decode the text in modified utf-7"""
__check_utf7_format(text)
rxp = re.compile("&[^&-]*-")
match = rxp.search(text)
while match:
encoded_text = match.group(0)
decoded_text = __decode_modified_utf7(encoded_text)
text = rxp.sub(decoded_text, text, count=1)
match = rxp.search(text)
result = text
return result