Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Add fallback to TextDecoder and TextEncoder #4324

Merged
merged 3 commits into from
Jul 11, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 55 additions & 14 deletions lib/util/string_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,38 @@ shaka.util.StringUtils = class {
if (uint8[0] == 0xef && uint8[1] == 0xbb && uint8[2] == 0xbf) {
uint8 = uint8.subarray(3);
}

// Use the TextDecoder interface to decode the text. This has the advantage
// compared to the previously-standard decodeUriComponent that it will
// continue parsing even if it finds an invalid UTF8 character, rather than
// stop and throw an error.
const utf8decoder = new TextDecoder();
const decoded = utf8decoder.decode(uint8);
if (decoded.includes('\uFFFD')) {
shaka.log.alwaysError('Decoded string contains an "unknown character" ' +
'codepoint. That probably means the UTF8 ' +
'encoding was incorrect!');
if (window.TextDecoder) {
// Use the TextDecoder interface to decode the text. This has the
// advantage compared to the previously-standard decodeUriComponent that
// it will continue parsing even if it finds an invalid UTF8 character,
// rather than stop and throw an error.
const utf8decoder = new TextDecoder();
const decoded = utf8decoder.decode(uint8);
if (decoded.includes('\uFFFD')) {
shaka.log.alwaysError('Decoded string contains an "unknown character' +
'" codepoint. That probably means the UTF8 ' +
'encoding was incorrect!');
}
return decoded;
} else {
// http://stackoverflow.com/a/13691499
const utf8 = shaka.util.StringUtils.fromCharCode(uint8);
// This converts each character in the string to an escape sequence. If
// the character is in the ASCII range, it is not converted; otherwise it
// is converted to a URI escape sequence.
// Example: '\x67\x35\xe3\x82\xac' -> 'g#%E3%82%AC'
const escaped = escape(utf8);
// Decode the escaped sequence. This will interpret UTF-8 sequences into
// the correct character.
// Example: 'g#%E3%82%AC' -> 'g#€'
try {
return decodeURIComponent(escaped);
} catch (e) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL, shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.BAD_ENCODING);
}
}
return decoded;
}


Expand Down Expand Up @@ -141,8 +160,30 @@ shaka.util.StringUtils = class {
* @export
*/
static toUTF8(str) {
const utf8Encoder = new TextEncoder();
return shaka.util.BufferUtils.toArrayBuffer(utf8Encoder.encode(str));
if (window.TextEncoder) {
const utf8Encoder = new TextEncoder();
return shaka.util.BufferUtils.toArrayBuffer(utf8Encoder.encode(str));
} else {
// http://stackoverflow.com/a/13691499
// Converts the given string to a URI encoded string. If a character
// falls in the ASCII range, it is not converted; otherwise it will be
// converted to a series of URI escape sequences according to UTF-8.
// Example: 'g#€' -> 'g#%E3%82%AC'
const encoded = encodeURIComponent(str);
// Convert each escape sequence individually into a character. Each
// escape sequence is interpreted as a code-point, so if an escape
// sequence happens to be part of a multi-byte sequence, each byte will
// be converted to a single character.
// Example: 'g#%E3%82%AC' -> '\x67\x35\xe3\x82\xac'
const utf8 = unescape(encoded);

const result = new Uint8Array(utf8.length);
for (let i = 0; i < utf8.length; i++) {
const item = utf8[i];
result[i] = item.charCodeAt(0);
}
return shaka.util.BufferUtils.toArrayBuffer(result);
}
}


Expand Down