shaka-project · joeyparrish · Jul 11, 2022 · Jun 29, 2022 · Jul 1, 2022 · Jul 1, 2022
diff --git a/lib/util/string_utils.js b/lib/util/string_utils.js
@@ -36,19 +36,38 @@ shaka.util.StringUtils = class {
     if (uint8[0] == 0xef && uint8[1] == 0xbb && uint8[2] == 0xbf) {
       uint8 = uint8.subarray(3);
     }
-
-    // Use the TextDecoder interface to decode the text.  This has the advantage
-    // compared to the previously-standard decodeUriComponent that it will
-    // continue parsing even if it finds an invalid UTF8 character, rather than
-    // stop and throw an error.
-    const utf8decoder = new TextDecoder();
-    const decoded = utf8decoder.decode(uint8);
-    if (decoded.includes('\uFFFD')) {
-      shaka.log.alwaysError('Decoded string contains an "unknown character" ' +
-                            'codepoint.  That probably means the UTF8 ' +
-                            'encoding was incorrect!');
+    if (window.TextDecoder) {
+      // Use the TextDecoder interface to decode the text.  This has the
+      // advantage compared to the previously-standard decodeUriComponent that
+      // it will continue parsing even if it finds an invalid UTF8 character,
+      // rather than stop and throw an error.
+      const utf8decoder = new TextDecoder();
+      const decoded = utf8decoder.decode(uint8);
+      if (decoded.includes('\uFFFD')) {
+        shaka.log.alwaysError('Decoded string contains an "unknown character' +
+                              '" codepoint.  That probably means the UTF8 ' +
+                              'encoding was incorrect!');
+      }
+      return decoded;
+    } else {
+      // http://stackoverflow.com/a/13691499
+      const utf8 = shaka.util.StringUtils.fromCharCode(uint8);
+      // This converts each character in the string to an escape sequence.  If
+      // the character is in the ASCII range, it is not converted; otherwise it
+      // is converted to a URI escape sequence.
+      // Example: '\x67\x35\xe3\x82\xac' -> 'g#%E3%82%AC'
+      const escaped = escape(utf8);
+      // Decode the escaped sequence.  This will interpret UTF-8 sequences into
+      // the correct character.
+      // Example: 'g#%E3%82%AC' -> 'g#€'
+      try {
+        return decodeURIComponent(escaped);
+      } catch (e) {
+        throw new shaka.util.Error(
+            shaka.util.Error.Severity.CRITICAL, shaka.util.Error.Category.TEXT,
+            shaka.util.Error.Code.BAD_ENCODING);
+      }
     }
-    return decoded;
   }
 
 
@@ -141,8 +160,30 @@ shaka.util.StringUtils = class {
    * @export
    */
   static toUTF8(str) {
-    const utf8Encoder = new TextEncoder();
-    return shaka.util.BufferUtils.toArrayBuffer(utf8Encoder.encode(str));
+    if (window.TextEncoder) {
+      const utf8Encoder = new TextEncoder();
+      return shaka.util.BufferUtils.toArrayBuffer(utf8Encoder.encode(str));
+    } else {
+      // http://stackoverflow.com/a/13691499
+      // Converts the given string to a URI encoded string.  If a character
+      // falls in the ASCII range, it is not converted; otherwise it will be
+      // converted to a series of URI escape sequences according to UTF-8.
+      // Example: 'g#€' -> 'g#%E3%82%AC'
+      const encoded = encodeURIComponent(str);
+      // Convert each escape sequence individually into a character.  Each
+      // escape sequence is interpreted as a code-point, so if an escape
+      // sequence happens to be part of a multi-byte sequence, each byte will
+      // be converted to a single character.
+      // Example: 'g#%E3%82%AC' -> '\x67\x35\xe3\x82\xac'
+      const utf8 = unescape(encoded);
+
+      const result = new Uint8Array(utf8.length);
+      for (let i = 0; i < utf8.length; i++) {
+        const item = utf8[i];
+        result[i] = item.charCodeAt(0);
+      }
+      return shaka.util.BufferUtils.toArrayBuffer(result);
+    }
   }