Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] Handle importing UTF-16 encoded CSV files #568

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
5cddf82
Fixes: Handle importing UTF-16 encoded CSV files (#550)
akhilsharmaa Nov 22, 2024
e02605d
Added unit testing for "get_encoding_format()" function testing for …
akhilsharmaa Nov 25, 2024
0dc7459
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
nemesifier Nov 26, 2024
bf3947c
Add unit test, reproducing the bug#550 in unit testing, added files (…
akhilsharmaa Nov 27, 2024
01fe5c4
Refactored code (defined a seperate function for the assertion), sepe…
akhilsharmaa Nov 27, 2024
a45fd96
Improved logic of the `get_encoding_format` function.
akhilsharmaa Nov 28, 2024
1d7a7b3
removed unwanted comments
akhilsharmaa Nov 28, 2024
19f0580
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
akhilsharmaa Nov 29, 2024
6fa896f
[change] Refactored code by running openwisp-qa-format
akhilsharmaa Dec 4, 2024
e341735
[changes] refactored using `black==23.12.1"
akhilsharmaa Dec 4, 2024
ee0b2cf
[changes] Removed use of chardet because it was unstable on utf-8 blo…
akhilsharmaa Dec 5, 2024
8f892c0
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
akhilsharmaa Dec 5, 2024
f26e44b
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
nemesifier Dec 5, 2024
1fa2894
[changes] Removed unused dependency chardet
akhilsharmaa Jan 27, 2025
7b0b9d0
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
nemesifier Jan 27, 2025
4e5f0a7
Merge branch 'master' into Issues/550-failure-on-importing-utf-16-files
akhilsharmaa Jan 29, 2025
ae29c39
[tests] Added one more utf16 encoded test file
nemesifier Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion openwisp_radius/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
)
from ..utils import (
SmsMessage,
decode_byte_data,
find_available_username,
generate_sms_token,
get_sms_default_valid_until,
Expand Down Expand Up @@ -951,7 +952,7 @@ def csvfile_upload(
if not csvfile:
csvfile = self.csvfile
csv_data = csvfile.read()
csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data
csv_data = decode_byte_data(csv_data)
reader = csv.reader(StringIO(csv_data), delimiter=',')
self.full_clean()
self.save()
Expand Down
1 change: 1 addition & 0 deletions openwisp_radius/receivers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Receiver functions for django signals (eg: post_save)
"""

import logging

from celery.exceptions import OperationalError
Expand Down
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
44D1FADD7379,cleartext$D0weL6L8,[email protected],EAPUSER1,USER1
13 changes: 13 additions & 0 deletions openwisp_radius/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ def test_validate_file_format(self):
in error.exception.message
)

def test_validate_utf16_file_format(self):
utf_16_file_1_format_path = self._get_path('static/test_batch_utf16_file1.csv')
akhilsharmaa marked this conversation as resolved.
Show resolved Hide resolved
assert validate_csvfile(open(utf_16_file_1_format_path, 'rb')) is None

utf_16_file_2_format_path = self._get_path('static/test_batch_utf16_file2.csv')
assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None

def test_validate_utf8Sig_file_format(self):
utf_16_file_2_format_path = self._get_path(
'static/test_batch_utf8Sig_file2.csv'
)
assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None

def test_validate_csvfile(self):
invalid_csv_path = self._get_path('static/test_batch_invalid.csv')
improper_csv_path = self._get_path('static/test_batch_improper.csv')
Expand Down
25 changes: 24 additions & 1 deletion openwisp_radius/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,40 @@ def find_available_username(username, users_list, prefix=False):
return tmp


def get_encoding_format(byte_data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to implement this function instead of using the chardet library?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, because chardet was not detecting the utf-8-sig format encodings.

# Explicitly handle some common encodings, including utf-16le
common_encodings = ['utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 'ascii']

for enc in common_encodings:
try:
byte_data.decode(enc)
return enc
except (UnicodeDecodeError, TypeError):
continue

return 'utf-8'


def decode_byte_data(data):
if isinstance(data, bytes):
data = data.decode(get_encoding_format(data))
data = data.replace('\x00', '') # Removing null bytes
return data


def validate_csvfile(csvfile):
csv_data = csvfile.read()

try:
csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data
csv_data = decode_byte_data(csv_data)
except UnicodeDecodeError:
raise ValidationError(
_(
'Unrecognized file format, the supplied file '
'does not look like a CSV file.'
)
)

reader = csv.reader(StringIO(csv_data), delimiter=',')
error_message = 'The CSV contains a line with invalid data,\
line number {} triggered the following error: {}'
Expand Down