Skip to content

Commit

Permalink
Fix hmt encoding (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShellyHerself authored May 1, 2020
2 parents b18e984 + e497b72 commit 4a0a1e3
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 23 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.MD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

This project also inherits changes from [Binilla](https://github.com/Sigmmma/binilla).

## [1.9.3]
### Changed
- Don't use charsetnormalizer. Fixes issue with not being able to install on Windows.

## [1.9.2]
### Changed
- Fix encoding problems with hmt files that aren't utf-16-le encoded.
Expand Down
4 changes: 2 additions & 2 deletions mozzarilla/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# ##############
__author__ = "Sigmmma"
# YYYY.MM.DD
__date__ = "2020.04.28"
__version__ = (1, 9, 2)
__date__ = "2020.05.01"
__version__ = (1, 9, 3)
__website__ = "https://github.com/Sigmmma/mozzarilla"
36 changes: 26 additions & 10 deletions mozzarilla/windows/tools/compile_hud_message_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#

import os
import charset_normalizer

from pathlib import Path
from traceback import format_exc
Expand All @@ -17,6 +16,30 @@
from supyr_struct.util import is_path_empty
from binilla.windows.filedialog import askopenfilename

def hacky_detect_encoding(fp):
fp = Path(fp)
with fp.open("rb") as f:
data = f.read(2)

# Check if the file contains any of the two utf-16 BOMs
if data[0] == 255 and data[1] == 254:
encoding = "utf-16-le"
elif data[1] == 254 and data[0] == 255:
encoding = "utf-16-be"
else:
# If not we default to latin-1
with fp.open("rb") as f:
data = f.read()
encoding = "latin-1"

# But, if we find a null byte while checking every other byte,
# we assume utf-16 without BOM
for i in range(1, len(data), 2):
if data[i] == 0:
encoding = 'utf-16-le'
break

return encoding

def hud_message_text_from_hmt(app, fp=None):
load_dir = app.last_data_load_dir
Expand Down Expand Up @@ -46,15 +69,8 @@ def hud_message_text_from_hmt(app, fp=None):
print("Creating hud_message_text from this hmt file:")
print(" %s" % fp)

with fp.open("rb") as f:
contents = f.read()
guess = charset_normalizer.detect(contents)
# utf-16 is our fallback as that is the proper encoding for
# these files and has the biggest detection failure rate.
hmt_string_data = contents.decode(guess['encoding'] or "utf-16")
# Reading files this way doesn't remove carriage returns.
# We have to wipe them out like this.
hmt_string_data = hmt_string_data.replace("\r", "")
with fp.open("r", encoding=hacky_detect_encoding(fp)) as f:
hmt_string_data = f.read()

except Exception:
print(format_exc())
Expand Down
12 changes: 4 additions & 8 deletions mozzarilla/windows/tools/compile_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from supyr_struct.util import is_path_empty
from binilla.windows.filedialog import askopenfilename

from mozzarilla.windows.tools.compile_hud_message_text import hacky_detect_encoding


def strings_from_txt(app, fp=None):
load_dir = app.last_data_load_dir
Expand Down Expand Up @@ -45,15 +47,9 @@ def strings_from_txt(app, fp=None):
tag_ext = "unicode_string_list"
tag_cls = "ustr"

with fp.open("rb") as f:
data = f.read(2)
encoding = hacky_detect_encoding(fp)

if data[0] == 255 and data[1] == 254:
encoding = "utf-16-le"
elif data[1] == 254 and data[0] == 255:
encoding = "utf-16-be"
else:
encoding = "latin-1"
if encoding == "latin-1":
tag_ext = "string_list"
tag_cls = "str#"

Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@ supyr_struct
binilla
reclaimer
arbytmap
charset_normalizer
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@
},
platforms=["POSIX", "Windows"],
keywords=["binilla", "binary", "data structure"],
install_requires=['reclaimer', 'binilla', 'arbytmap', 'supyr_struct', "charset_normalizer"],
requires=['reclaimer', 'arbytmap', 'binilla', "charset_normalizer"],
install_requires=['reclaimer', 'binilla', 'arbytmap', 'supyr_struct'],
requires=['reclaimer', 'arbytmap', 'binilla'],
provides=['mozzarilla'],
python_requires=">=3.5",
classifiers=[
Expand Down

0 comments on commit 4a0a1e3

Please sign in to comment.