diff --git a/src/core/segment_sinks/implementations/text/text_segment_sink.ts b/src/core/segment_sinks/implementations/text/text_segment_sink.ts
index 39b9366f9f..eb83277aa5 100644
--- a/src/core/segment_sinks/implementations/text/text_segment_sink.ts
+++ b/src/core/segment_sinks/implementations/text/text_segment_sink.ts
@@ -1,6 +1,7 @@
 import log from "../../../../log";
 import type { ITextDisplayer } from "../../../../main_thread/types";
 import type { ITextTrackSegmentData } from "../../../../transports";
+import isNullOrUndefined from "../../../../utils/is_null_or_undefined";
 import getMonotonicTimeStamp from "../../../../utils/monotonic_timestamp";
 import type { IRange } from "../../../../utils/ranges";
 import type { ICompleteSegmentInfo, IPushChunkInfos, ISBOperation } from "../types";
@@ -142,9 +143,9 @@ export default class TextSegmentSink extends SegmentSink {
 }
 
 /** Data of chunks that should be pushed to the HTMLTextSegmentSink. */
-export interface ITextTracksBufferSegmentData {
+export interface ITextTracksBufferSegmentData<T extends string | BufferSource> {
   /** The text track data, in the format indicated in `type`. */
-  data: string;
+  data: T;
   /** The format of `data` (examples: "ttml", "srt" or "vtt") */
   type: string;
   /**
@@ -153,6 +154,11 @@ export interface ITextTracksBufferSegmentData {
    * be parsed.
    */
   language?: string | undefined;
+  /**
+   * Optional timescale data context that is used to convert timing information
+   * into seconds.
+   */
+  timescale: number | null;
   /** start time from which the segment apply, in seconds. */
   start?: number | undefined;
   /** end time until which the segment apply, in seconds. */
@@ -167,7 +173,7 @@ export interface ITextTracksBufferSegmentData {
  */
 function assertChunkIsTextTrackSegmentData(
   chunk: unknown,
-): asserts chunk is ITextTracksBufferSegmentData {
+): asserts chunk is ITextTracksBufferSegmentData<string | BufferSource> {
   if (
     (__ENVIRONMENT__.CURRENT_ENV as number) === (__ENVIRONMENT__.PRODUCTION as number)
   ) {
@@ -176,14 +182,30 @@ function assertChunkIsTextTrackSegmentData(
   if (
     typeof chunk !== "object" ||
     chunk === null ||
-    typeof (chunk as ITextTracksBufferSegmentData).data !== "string" ||
-    typeof (chunk as ITextTracksBufferSegmentData).type !== "string" ||
-    ((chunk as ITextTracksBufferSegmentData).language !== undefined &&
-      typeof (chunk as ITextTracksBufferSegmentData).language !== "string") ||
-    ((chunk as ITextTracksBufferSegmentData).start !== undefined &&
-      typeof (chunk as ITextTracksBufferSegmentData).start !== "number") ||
-    ((chunk as ITextTracksBufferSegmentData).end !== undefined &&
-      typeof (chunk as ITextTracksBufferSegmentData).end !== "number")
+    isNullOrUndefined((chunk as ITextTracksBufferSegmentData<string | BufferSource>).data)
+  ) {
+    throw new Error("Invalid format given to a TextSegmentSink");
+  }
+  if (
+    typeof (chunk as ITextTracksBufferSegmentData<string | BufferSource>).type !==
+      "string" ||
+    ((chunk as ITextTracksBufferSegmentData<string | BufferSource>).language !==
+      undefined &&
+      typeof (chunk as ITextTracksBufferSegmentData<string | BufferSource>).language !==
+        "string") ||
+    ((chunk as ITextTracksBufferSegmentData<string | BufferSource>).start !== undefined &&
+      typeof (chunk as ITextTracksBufferSegmentData<string | BufferSource>).start !==
+        "number") ||
+    ((chunk as ITextTracksBufferSegmentData<string | BufferSource>).end !== undefined &&
+      typeof (chunk as ITextTracksBufferSegmentData<string | BufferSource>).end !==
+        "number")
+  ) {
+    throw new Error("Invalid format given to a TextSegmentSink");
+  }
+  if (
+    typeof (chunk as ITextTracksBufferSegmentData<string>).data !== "string" &&
+    typeof (chunk as ITextTracksBufferSegmentData<BufferSource>).data.byteLength !==
+      "number"
   ) {
     throw new Error("Invalid format given to a TextSegmentSink");
   }
@@ -229,8 +251,10 @@ export interface ITextDisplayerInterface {
  */
 if ((__ENVIRONMENT__.CURRENT_ENV as number) === (__ENVIRONMENT__.DEV as number)) {
   // @ts-expect-error: unused function for type checking
-  function _checkType(input: ITextTrackSegmentData): void {
-    function checkEqual(_arg: ITextTracksBufferSegmentData): void {
+  function _checkType<T extends string | BufferSource>(
+    input: ITextTrackSegmentData<T>,
+  ): void {
+    function checkEqual(_arg: ITextTracksBufferSegmentData<T>): void {
       /* nothing */
     }
     checkEqual(input);
diff --git a/src/main_thread/text_displayer/html/html_parsers.ts b/src/main_thread/text_displayer/html/html_parsers.ts
index 1d77d69b4a..13ca4de0bd 100644
--- a/src/main_thread/text_displayer/html/html_parsers.ts
+++ b/src/main_thread/text_displayer/html/html_parsers.ts
@@ -11,6 +11,8 @@ export interface IHTMLCue {
  * Convert text track data into timed HTML Cues.
  * @param {string} type - Text track format wanted
  * @param {string} data - Text track data
+ * @param {Number} timescale - Potential external timescale to convert timing
+ * information into seconds.
  * @param {Number} timestampOffset - offset to apply to every timed text
  * @param {string} [language] - language of the text tracks
  * @returns {Array.<Object>}
@@ -18,7 +20,8 @@ export interface IHTMLCue {
  */
 export default function parseTextTrackToElements(
   type: string,
-  data: string,
+  data: string | BufferSource,
+  timescale: number,
   timestampOffset: number,
   language?: string,
 ): IHTMLCue[] {
@@ -29,7 +32,7 @@ export default function parseTextTrackToElements(
     throw new Error("no parser found for the given text track");
   }
   log.debug("HTSB: Parser found, parsing...");
-  const parsed = parser(data, timestampOffset, language);
+  const parsed = parser(data, timescale, timestampOffset, language);
   log.debug("HTTB: Parsed successfully!", parsed.length);
   return parsed;
 }
diff --git a/src/main_thread/text_displayer/html/html_text_displayer.ts b/src/main_thread/text_displayer/html/html_text_displayer.ts
index 91b1b2858f..dfc8c10e4a 100644
--- a/src/main_thread/text_displayer/html/html_text_displayer.ts
+++ b/src/main_thread/text_displayer/html/html_text_displayer.ts
@@ -126,12 +126,25 @@ export default class HTMLTextDisplayer implements ITextDisplayer {
       return convertToRanges(this._buffered);
     }
 
-    const { start: startTime, end: endTime, data: dataString, type, language } = chunk;
+    const {
+      start: startTime,
+      end: endTime,
+      data: dataRaw,
+      type,
+      language,
+      timescale,
+    } = chunk;
 
     const appendWindowStart = appendWindow[0] ?? 0;
     const appendWindowEnd = appendWindow[1] ?? Infinity;
 
-    const cues = parseTextTrackToElements(type, dataString, timestampOffset, language);
+    const cues = parseTextTrackToElements(
+      type,
+      dataRaw,
+      timescale ?? 1,
+      timestampOffset,
+      language,
+    );
 
     if (appendWindowStart !== 0 && appendWindowEnd !== Infinity) {
       // Removing before window start
@@ -398,7 +411,7 @@ export default class HTMLTextDisplayer implements ITextDisplayer {
 /** Data of chunks that should be pushed to the `HTMLTextDisplayer`. */
 export interface ITextTracksBufferSegmentData {
   /** The text track data, in the format indicated in `type`. */
-  data: string;
+  data: string | BufferSource;
   /** The format of `data` (examples: "ttml", "srt" or "vtt") */
   type: string;
   /**
@@ -424,7 +437,9 @@ export interface ITextTracksBufferSegmentData {
  */
 if ((__ENVIRONMENT__.CURRENT_ENV as number) === (__ENVIRONMENT__.DEV as number)) {
   // @ts-expect-error: uncalled function just for type-checking
-  function _checkType(input: ITextTrackSegmentData): void {
+  function _checkType<T extends string | BufferSource>(
+    input: ITextTrackSegmentData<T>,
+  ): void {
     function checkEqual(_arg: ITextTracksBufferSegmentData): void {
       /* nothing */
     }
diff --git a/src/main_thread/text_displayer/native/native_parsers.ts b/src/main_thread/text_displayer/native/native_parsers.ts
index 9aec18dcc5..6fb16f6c9e 100644
--- a/src/main_thread/text_displayer/native/native_parsers.ts
+++ b/src/main_thread/text_displayer/native/native_parsers.ts
@@ -5,7 +5,9 @@ import log from "../../../log";
 /**
  * Convert text track data into timed VTT Cues.
  * @param {string} type - Text track format wanted
- * @param {string} data - Text track data
+ * @param {string|BufferSource} data - Text track data
+ * @param {Number} timescale - Potential external timescale to convert timing
+ * information into seconds.
  * @param {Number} timestampOffset - offset to apply to every timed text
  * @param {string} [language] - language of the text tracks
  * @returns {Array.<VTTCue>}
@@ -13,7 +15,8 @@ import log from "../../../log";
  */
 export default function parseTextTrackToCues(
   type: string,
-  data: string,
+  data: string | BufferSource,
+  timescale: number,
   timestampOffset: number,
   language?: string,
 ): Array<ICompatVTTCue | TextTrackCue> {
@@ -25,7 +28,7 @@ export default function parseTextTrackToCues(
   }
 
   log.debug("NTSB: Parser found, parsing...");
-  const parsed = parser(data, timestampOffset, language);
+  const parsed = parser(data, timescale, timestampOffset, language);
   log.debug("NTSB: Parsed successfully!", parsed.length);
   return parsed;
 }
diff --git a/src/main_thread/text_displayer/native/native_text_displayer.ts b/src/main_thread/text_displayer/native/native_text_displayer.ts
index b768f62cdf..147f75e8b7 100644
--- a/src/main_thread/text_displayer/native/native_text_displayer.ts
+++ b/src/main_thread/text_displayer/native/native_text_displayer.ts
@@ -48,10 +48,23 @@ export default class NativeTextDisplayer implements ITextDisplayer {
       return convertToRanges(this._buffered);
     }
     const { timestampOffset, appendWindow, chunk } = infos;
-    const { start: startTime, end: endTime, data: dataString, type, language } = chunk;
+    const {
+      start: startTime,
+      end: endTime,
+      data: dataString,
+      type,
+      language,
+      timescale,
+    } = chunk;
     const appendWindowStart = appendWindow[0] ?? 0;
     const appendWindowEnd = appendWindow[1] ?? Infinity;
-    const cues = parseTextTrackToCues(type, dataString, timestampOffset, language);
+    const cues = parseTextTrackToCues(
+      type,
+      dataString,
+      timescale ?? 1,
+      timestampOffset,
+      language,
+    );
 
     if (appendWindowStart !== 0 && appendWindowEnd !== Infinity) {
       // Removing before window start
@@ -222,7 +235,7 @@ export default class NativeTextDisplayer implements ITextDisplayer {
 /** Data of chunks that should be pushed to the NativeTextDisplayer. */
 export interface INativeTextTracksBufferSegmentData {
   /** The text track data, in the format indicated in `type`. */
-  data: string;
+  data: string | BufferSource;
   /** The format of `data` (examples: "ttml", "srt" or "vtt") */
   type: string;
   /**
diff --git a/src/parsers/containers/isobmff/utils.ts b/src/parsers/containers/isobmff/utils.ts
index 2f21752d45..ce7737c3ff 100644
--- a/src/parsers/containers/isobmff/utils.ts
+++ b/src/parsers/containers/isobmff/utils.ts
@@ -20,6 +20,7 @@ import {
   be2toi,
   be3toi,
   be4toi,
+  be4toiSigned,
   be8toi,
   concat,
   itobe4,
@@ -233,6 +234,92 @@ function getDefaultDurationFromTFHDInTRAF(traf: Uint8Array): number | undefined
   return defaultDuration;
 }
 
+interface ITrunSampleInfo {
+  duration: number;
+  compositionTimeOffset: number | undefined;
+  size: number | undefined;
+  flags: number | undefined;
+}
+
+function getTrunSamples(buffer: Uint8Array): ITrunSampleInfo[] {
+  const trafs = getTRAFs(buffer);
+  const samples: ITrunSampleInfo[] = [];
+  for (const traf of trafs) {
+    const trun = getBoxContent(traf, 0x7472756e /* trun */);
+    if (trun === null) {
+      continue;
+    }
+    let cursor = 0;
+    const version = trun[cursor];
+    cursor += 1;
+    if (version > 1) {
+      return [];
+    }
+
+    const flags = be3toi(trun, cursor);
+    cursor += 3;
+    const hasSampleDuration = (flags & 0x000100) > 0;
+
+    let defaultDuration: number | undefined = 0;
+    if (!hasSampleDuration) {
+      defaultDuration = getDefaultDurationFromTFHDInTRAF(traf);
+      if (defaultDuration === undefined) {
+        return [];
+      }
+    }
+
+    const hasDataOffset = (flags & 0x000001) > 0;
+    const hasFirstSampleFlags = (flags & 0x000004) > 0;
+    const hasSampleSize = (flags & 0x000200) > 0;
+    const hasSampleFlags = (flags & 0x000400) > 0;
+    const hasSampleCompositionOffset = (flags & 0x000800) > 0;
+
+    const sampleCounts = be4toi(trun, cursor);
+    cursor += 4;
+
+    if (hasDataOffset) {
+      cursor += 4;
+    }
+    if (hasFirstSampleFlags) {
+      cursor += 4;
+    }
+
+    let i = sampleCounts;
+    while (i-- > 0) {
+      let duration;
+      let size;
+      let sampleFlags;
+      let compositionTimeOffset;
+      if (hasSampleDuration) {
+        duration = be4toi(trun, cursor);
+        cursor += 4;
+      } else {
+        duration = defaultDuration;
+      }
+      if (hasSampleSize) {
+        size = be4toi(trun, cursor);
+        cursor += 4;
+      }
+      if (hasSampleFlags) {
+        sampleFlags = be4toi(trun, cursor);
+        cursor += 4;
+      }
+      if (hasSampleCompositionOffset) {
+        compositionTimeOffset =
+          version === 0 ? be4toi(trun, cursor) : be4toiSigned(trun, cursor);
+        cursor += 4;
+      }
+      samples.push({
+        duration,
+        compositionTimeOffset,
+        size,
+        flags: sampleFlags,
+      });
+    }
+  }
+  return samples;
+}
+
 /**
  * Calculate segment duration approximation by additioning the duration from
  * every samples in a trun ISOBMFF box.
@@ -563,6 +650,7 @@ function getKeyIdFromInitSegment(segment: Uint8Array): Uint8Array | null {
   return keyId.every((b) => b === 0) ? null : keyId;
 }
 
+export type { ITrunSampleInfo };
 export {
   getKeyIdFromInitSegment,
   getMDHDTimescale,
@@ -573,4 +661,5 @@ export {
   patchPssh,
   updateBoxLength,
   parseEmsgBoxes,
+  getTrunSamples,
 };
diff --git a/src/parsers/texttracks/sami/html.ts b/src/parsers/texttracks/sami/html.ts
index c73ed84fbb..dbac926cfd 100644
--- a/src/parsers/texttracks/sami/html.ts
+++ b/src/parsers/texttracks/sami/html.ts
@@ -29,8 +29,10 @@
  * It always should be imported through the `features` object.
  */
 
+import bufferSourceToUint8 from "../../../utils/buffer_source_to_uint8";
 import isNonEmptyString from "../../../utils/is_non_empty_string";
 import isNullOrUndefined from "../../../utils/is_null_or_undefined";
+import { utf8ToStr } from "../../../utils/string_parsing";
 import type { IHTMLCue } from "../types";
 
 const HTML_ENTITIES = /&#([0-9]+);/g;
@@ -99,11 +101,25 @@ function decodeEntities(text: string): string {
  * The specification being quite clunky, this parser
  * may not work for every sami input.
  *
- * @param {string} smi
+ * @param {string|BufferSource} input
+ * @param {Number} _timescale
  * @param {Number} timeOffset
  * @param {string} lang
  */
-function parseSami(smi: string, timeOffset: number, lang?: string): IHTMLCue[] {
+function parseSami(
+  input: string | BufferSource,
+  _timescale: number,
+  timeOffset: number,
+  lang?: string,
+): IHTMLCue[] {
+  let smi: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    smi = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    smi = input;
+  }
   const syncOpen = /<sync[ >]/gi;
   const syncClose = /<sync[ >]|<\/body>/gi;
 
diff --git a/src/parsers/texttracks/sami/native.ts b/src/parsers/texttracks/sami/native.ts
index 441007da30..bb1c850686 100644
--- a/src/parsers/texttracks/sami/native.ts
+++ b/src/parsers/texttracks/sami/native.ts
@@ -21,8 +21,10 @@
 
 import type { ICompatVTTCue } from "../../../compat/browser_compatibility_types";
 import makeVTTCue from "../../../compat/make_vtt_cue";
+import bufferSourceToUint8 from "../../../utils/buffer_source_to_uint8";
 import isNonEmptyString from "../../../utils/is_non_empty_string";
 import isNullOrUndefined from "../../../utils/is_null_or_undefined";
+import { utf8ToStr } from "../../../utils/string_parsing";
 
 const HTML_ENTITIES = /&#([0-9]+);/g;
 const BR = /<br>/gi;
@@ -104,16 +106,26 @@ function decodeEntities(text: string): string {
  * The specification being quite clunky, this parser
  * may not work for every sami input.
  *
- * @param {string} smi
+ * @param {string|BufferSource} input
+ * @param {Number} _timescale
  * @param {Number} timeOffset
  * @param {string} lang
  * @returns {Array.<VTTCue|TextTrackCue>}
  */
 function parseSami(
-  smi: string,
+  input: string | BufferSource,
+  _timescale: number,
   timeOffset: number,
   lang?: string,
 ): Array<TextTrackCue | ICompatVTTCue> {
+  let smi: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    smi = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    smi = input;
+  }
   const syncOpen = /<sync[ >]/gi;
   const syncClose = /<sync[ >]|<\/body>/gi;
 
diff --git a/src/parsers/texttracks/srt/html.ts b/src/parsers/texttracks/srt/html.ts
index d123106b9b..43b0178cb1 100644
--- a/src/parsers/texttracks/srt/html.ts
+++ b/src/parsers/texttracks/srt/html.ts
@@ -24,6 +24,8 @@
 // Done for fun. Understand <b>, <i>, <u> and <font color="#ff0000" /> type
 // of tags.
 
+import bufferSourceToUint8 from "../../../utils/buffer_source_to_uint8";
+import { utf8ToStr } from "../../../utils/string_parsing";
 import getCueBlocks from "./get_cue_blocks";
 import parseCueBlock from "./parse_cue";
 
@@ -34,14 +36,24 @@ export interface ISRTHTMLCue {
 }
 
 /**
- * @param {string} srtStr
+ * @param {string|BufferSource} input
+ * @param {Number} _timescale
  * @param {Number} timeOffset
  * @returns {Array.<Object>}
  */
 export default function parseSRTStringToHTML(
-  srtStr: string,
+  input: string | BufferSource,
+  _timescale: number,
   timeOffset: number,
 ): ISRTHTMLCue[] {
+  let srtStr: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    srtStr = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    srtStr = input;
+  }
   // Even if srt only authorize CRLF, we will also take LF or CR as line
   // terminators for resilience
   const lines = srtStr.split(/\r\n|\n|\r/);
diff --git a/src/parsers/texttracks/srt/native.ts b/src/parsers/texttracks/srt/native.ts
index 80485059ff..b14f32e4e6 100644
--- a/src/parsers/texttracks/srt/native.ts
+++ b/src/parsers/texttracks/srt/native.ts
@@ -24,20 +24,32 @@
 
 import type { ICompatVTTCue } from "../../../compat/browser_compatibility_types";
 import makeVTTCue from "../../../compat/make_vtt_cue";
+import bufferSourceToUint8 from "../../../utils/buffer_source_to_uint8";
+import { utf8ToStr } from "../../../utils/string_parsing";
 import getCueBlocks from "./get_cue_blocks";
 import parseCueBlock from "./parse_cue";
 
 /**
  * Parse whole srt file into an array of cues, to be inserted in a video's
  * TrackElement.
- * @param {string} srtStr
+ * @param {string|bufferSource} input
+ * @param {Number} _timescale
  * @param {Number} timeOffset
  * @returns {Array.<VTTCue|TextTrackCue>}
  */
 export default function parseSRTStringToVTTCues(
-  srtStr: string,
+  input: string | BufferSource,
+  _timescale: number,
   timeOffset: number,
 ): Array<ICompatVTTCue | TextTrackCue> {
+  let srtStr: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    srtStr = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    srtStr = input;
+  }
   // Even if srt only authorize CRLF, we will also take LF or CR as line
   // terminators for resilience
   const lines = srtStr.split(/\r\n|\n|\r/);
diff --git a/src/parsers/texttracks/ttml/html/__tests__/__global__/html_ttml_parser.test.ts b/src/parsers/texttracks/ttml/html/__tests__/__global__/html_ttml_parser.test.ts
index 5716ea18e7..943d757080 100644
--- a/src/parsers/texttracks/ttml/html/__tests__/__global__/html_ttml_parser.test.ts
+++ b/src/parsers/texttracks/ttml/html/__tests__/__global__/html_ttml_parser.test.ts
@@ -79,7 +79,7 @@ const testingText = `<?xml version="1.0" encoding="UTF-8"?>
 </tt>`;
 
 describe("Global TTML HTML parsing tests", () => {
-  const res = parseTTMLToDiv(testingText, 0);
+  const res = parseTTMLToDiv(testingText, 1, 0);
   it("should parse the right amount of cues at the right time", () => {
     expect(res).toHaveLength(11);
     expect(res[0].start).toEqual(0.76);
diff --git a/src/parsers/texttracks/ttml/html/parse_ttml_to_div.ts b/src/parsers/texttracks/ttml/html/parse_ttml_to_div.ts
index d6b87561e9..ff0dbe8c97 100644
--- a/src/parsers/texttracks/ttml/html/parse_ttml_to_div.ts
+++ b/src/parsers/texttracks/ttml/html/parse_ttml_to_div.ts
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import bufferSourceToUint8 from "../../../../utils/buffer_source_to_uint8";
+import { utf8ToStr } from "../../../../utils/string_parsing";
 import parseTtml from "../parse_ttml";
 import {
   applyDefaultTTMLStyle,
@@ -36,10 +38,23 @@ import parseCue from "./parse_cue";
  * TODO TTML parsing is still pretty heavy on the CPU.
  * Optimizations have been done, principally to avoid using too much XML APIs,
  * but we can still do better.
- * @param {string} str
+ * @param {string|BufferSource} input
+ * @param {number} _timescale
  * @param {number} timeOffset
  */
-export default function parseTTMLToDiv(str: string, timeOffset: number): ITTMLHTMLCue[] {
+export default function parseTTMLToDiv(
+  input: string | BufferSource,
+  _timescale: number,
+  timeOffset: number,
+): ITTMLHTMLCue[] {
+  let str: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    str = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    str = input;
+  }
   const parsedCues = parseTtml(str, timeOffset);
   const cues: ITTMLHTMLCue[] = [];
   for (const parsedCue of parsedCues) {
diff --git a/src/parsers/texttracks/ttml/native/parse_ttml_to_vtt.ts b/src/parsers/texttracks/ttml/native/parse_ttml_to_vtt.ts
index d2c11be02b..64f49d98b7 100644
--- a/src/parsers/texttracks/ttml/native/parse_ttml_to_vtt.ts
+++ b/src/parsers/texttracks/ttml/native/parse_ttml_to_vtt.ts
@@ -15,17 +15,30 @@
  */
 
 import type { ICompatVTTCue } from "../../../../compat/browser_compatibility_types";
+import bufferSourceToUint8 from "../../../../utils/buffer_source_to_uint8";
+import { utf8ToStr } from "../../../../utils/string_parsing";
 import parseTtml from "../parse_ttml";
 import parseCue from "./parse_cue";
 
 /**
- * @param str
- * @param timeOffset
+ * @param {string|BufferSource} input
+ * @param {number} _timescale
+ * @param {number} timeOffset
+ * @returns {Array.<VTTCue|TextTrackCue>}
  */
 export default function parseTtmlToNative(
-  str: string,
+  input: string | BufferSource,
+  _timescale: number,
   timeOffset: number,
 ): Array<TextTrackCue | ICompatVTTCue> {
+  let str: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    str = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    str = input;
+  }
   const parsedCues = parseTtml(str, timeOffset);
   const cues: Array<TextTrackCue | ICompatVTTCue> = [];
   for (const parsedCue of parsedCues) {
diff --git a/src/parsers/texttracks/types.ts b/src/parsers/texttracks/types.ts
index 529e8ca32f..d13c900a04 100644
--- a/src/parsers/texttracks/types.ts
+++ b/src/parsers/texttracks/types.ts
@@ -25,14 +25,16 @@ export interface IHTMLCue {
 
 // Function to parse texttracks into native VTT cues
 export type INativeTextTracksParserFn = (
-  texttrack: string,
+  texttrack: string | BufferSource,
+  timescale: number,
   timeOffset: number,
   language?: string,
 ) => Array<ICompatVTTCue | TextTrackCue>;
 
 // Function to parse texttracks into HTML cues
 export type IHTMLTextTracksParserFn = (
-  texttrack: string,
+  texttrack: string | BufferSource,
+  timescale: number,
   timeOffset: number,
   language?: string,
 ) => IHTMLCue[];
diff --git a/src/parsers/texttracks/webvtt/html/__tests__/parse_webvtt_to_div.test.ts b/src/parsers/texttracks/webvtt/html/__tests__/parse_webvtt_to_div.test.ts
index f221f0d220..cfc77df075 100644
--- a/src/parsers/texttracks/webvtt/html/__tests__/parse_webvtt_to_div.test.ts
+++ b/src/parsers/texttracks/webvtt/html/__tests__/parse_webvtt_to_div.test.ts
@@ -9,13 +9,13 @@ describe("parsers - webvtt - parseWebVTT", () => {
   it("should throw if text is empty", async () => {
     const parseWebVTT = (await vi.importActual("../parse_webvtt_to_div"))
       .default as typeof IParseWebVTT;
-    expect(() => parseWebVTT("", 0)).toThrowError("Can't parse WebVTT: Invalid File.");
+    expect(() => parseWebVTT("", 1, 0)).toThrowError("Can't parse WebVTT: Invalid File.");
   });
 
   it("should throw if file seems to be invalid", async () => {
     const parseWebVTT = (await vi.importActual("../parse_webvtt_to_div"))
       .default as typeof IParseWebVTT;
-    expect(() => parseWebVTT("WEBWTT\n", 0)).toThrowError(
+    expect(() => parseWebVTT("WEBWTT\n", 1, 0)).toThrowError(
       "Can't parse WebVTT: Invalid File.",
     );
   });
@@ -73,7 +73,7 @@ describe("parsers - webvtt - parseWebVTT", () => {
 
     const parseWebVTT = (await vi.importActual("../parse_webvtt_to_div"))
       .default as typeof IParseWebVTT;
-    expect(parseWebVTT("WEBVTT\n", 0)).toEqual([
+    expect(parseWebVTT("WEBVTT\n", 1, 0)).toEqual([
       {
         element: document.createElement("div"),
         end: 100,
@@ -140,7 +140,7 @@ describe("parsers - webvtt - parseWebVTT", () => {
 
     const parseWebVTT = (await vi.importActual("../parse_webvtt_to_div"))
       .default as typeof IParseWebVTT;
-    expect(parseWebVTT("WEBVTT\n", 0)).toEqual([]);
+    expect(parseWebVTT("WEBVTT\n", 1, 0)).toEqual([]);
     expect(spyGetFirstLineAfterHeader).toHaveBeenCalledTimes(1);
     expect(spyGetStyleBlock).toHaveBeenCalledTimes(1);
     expect(spyGetCueBlock).toHaveBeenCalledTimes(1);
diff --git a/src/parsers/texttracks/webvtt/html/parse_webvtt_to_div.ts b/src/parsers/texttracks/webvtt/html/parse_webvtt_to_div.ts
index f48fb82bb6..b6bd6c457c 100644
--- a/src/parsers/texttracks/webvtt/html/parse_webvtt_to_div.ts
+++ b/src/parsers/texttracks/webvtt/html/parse_webvtt_to_div.ts
@@ -14,6 +14,16 @@
  * limitations under the License.
  */
 
+import log from "../../../../log";
+import bufferSourceToUint8 from "../../../../utils/buffer_source_to_uint8";
+import { be4toi } from "../../../../utils/byte_parsing";
+import { strToUtf8, utf8ToStr } from "../../../../utils/string_parsing";
+import {
+  getBoxContent,
+  getMDAT,
+  getTrackFragmentDecodeTime,
+} from "../../../containers/isobmff";
+import { getTrunSamples } from "../../../containers/isobmff/utils";
 import getCueBlocks from "../get_cue_blocks";
 import getStyleBlocks from "../get_style_blocks";
 import parseCueBlock from "../parse_cue_block";
@@ -32,13 +42,27 @@ import toHTML from "./to_html";
  * Specific style is parsed and applied to class element.
  *
  * @throws Error - Throws if the given WebVTT string is invalid.
- * @param {string} text - The whole webvtt subtitles to parse
+ * @param {string | BufferSource} text - The whole webvtt subtitles to parse
+ * @param {Number} timescale
  * @param {Number} timeOffset - Offset to add to start and end times, in seconds
  * @return {Array.<Object>}
  */
-export default function parseWebVTT(text: string, timeOffset: number): IVTTHTMLCue[] {
+export default function parseWebVTT(
+  text: string | BufferSource,
+  timescale: number,
+  timeOffset: number,
+): IVTTHTMLCue[] {
+  let textStr: string;
+  if (typeof text !== "string") {
+    // Assume UTF-8
+    // XXX TODO:
+    // textStr = utf8ToStr(bufferSourceToUint8(text));
+    return parseWebVTTInMp4(text, timescale, timeOffset);
+  } else {
+    textStr = text;
+  }
   const newLineChar = /\r\n|\n|\r/g; // CRLF|LF|CR
-  const linified = text.split(newLineChar);
+  const linified = textStr.split(newLineChar);
 
   const cuesArray: IVTTHTMLCue[] = [];
   if (/^WEBVTT( |\t|\n|\r|$)/.exec(linified[0]) === null) {
@@ -61,3 +85,169 @@ export default function parseWebVTT(text: string, timeOffset: number): IVTTHTMLC
   }
   return cuesArray;
 }
+
+function parseWebVTTInMp4(
+  segment: BufferSource | string,
+  timescale: number,
+  timeOffset: number,
+): IVTTHTMLCue[] {
+  let buffer: Uint8Array;
+  if (typeof segment === "string") {
+    buffer = strToUtf8(segment);
+  } else {
+    buffer = bufferSourceToUint8(segment);
+  }
+  if (buffer.length === 0) {
+    return [];
+  }
+
+  const cuesArray = [];
+  const trackDecodeTime = getTrackFragmentDecodeTime(buffer);
+  if (trackDecodeTime === undefined) {
+    return [];
+  }
+  const trunSamples = getTrunSamples(buffer);
+  const mdat = getMDAT(buffer);
+  if (mdat === null) {
+    return [];
+  }
+  let mdatOffset = 0;
+  let lastTime = trackDecodeTime;
+  /** @type {!shaka.util.DataViewReader} */
+  // const reader = new shaka.util.DataViewReader(
+  //     rawPayload, shaka.util.DataViewReader.Endianness.BIG_ENDIAN);
+
+  for (const sample of trunSamples) {
+    const duration = sample.duration ?? 0;
+    const startTime =
+      sample.compositionTimeOffset !== undefined
+        ? lastTime + sample.compositionTimeOffset
+        : lastTime;
+    lastTime = startTime + duration;
+
+    // Read samples until it adds up to the given size.
+    let totalSize = 0;
+    // No sample size == a single sample
+    while (totalSize < (sample.size ?? 0)) {
+      // Read the payload size.
+      const payloadSize = be4toi(mdat, mdatOffset);
+      mdatOffset += 4;
+      totalSize += payloadSize;
+
+      const currentBoxName = utf8ToStr(mdat.slice(mdatOffset, mdatOffset + 4));
+      mdatOffset += 4;
+
+      let currentBoxData: Uint8Array | null = null;
+      if (currentBoxName === "vttc") {
+        if (payloadSize > 8) {
+          currentBoxData = mdat.slice(mdatOffset, mdatOffset + (payloadSize - 8));
+          mdatOffset += payloadSize - 8;
+        }
+      } else if (currentBoxName === "vtte") {
+        if (payloadSize > 8) {
+          mdatOffset += payloadSize - 8;
+        }
+      } else {
+        log.error("webvtt: encountered unknown fragmented vtt box: ", currentBoxName);
+        mdatOffset += Math.min(payloadSize - 8, 1);
+      }
+
+      if (duration > 0) {
+        if (currentBoxData !== null) {
+          const cue = parseVttC(
+            currentBoxData,
+            timeOffset + startTime / timescale,
+            timeOffset + lastTime / timescale,
+          );
+          if (cue !== null) {
+            cuesArray.push(cue);
+          }
+        }
+      } else {
+        log.error("webvtt: cue duration missing");
+      }
+      //
+      // goog.asserts.assert(
+      //     !sample.sampleSize || totalSize <= sample.sampleSize,
+      //     'The samples do not fit evenly into the sample sizes given in ' +
+      //     'the TRUN box!');
+      //
+      // If no sampleSize was specified, it's assumed that this sample
+      // corresponds to only a single cue.
+    }
+  }
+
+  // goog.asserts.assert(
+  //   !reader.hasMoreData(),
+  //   "MDAT which contain VTT cues and non-VTT data are not currently " + "supported!",
+  // );
+
+  return cuesArray;
+}
+
+function parseVttC(
+  data: Uint8Array,
+  startTime: number,
+  endTime: number,
+): IVTTHTMLCue | null {
+  const payload = getPayl(data);
+  // const iden = getIden(data);
+  // const settings = getSttg(data);
+  if (payload === null) {
+    return null;
+  }
+  const cueHtml = toHTML(
+    {
+      start: startTime,
+      end: endTime,
+      settings: {},
+      header: undefined,
+      payload: [utf8ToStr(payload)],
+    },
+    {
+      classes: {},
+      global: undefined,
+    },
+  );
+
+  // XXX TODO:
+  // if (settings) {
+  //   const parser = new shaka.util.TextParser(settings);
+  //
+  //   let word = parser.readWord();
+  //
+  //   while (word) {
+  //     // TODO: Check WebVTTConfigurationBox for region info.
+  //     if (
+  //       !shaka.text.VttTextParser.parseCueSetting(cue, word, /* VTTRegions= */ [])
+  //     ) {
+  //       shaka.log.warning(
+  //         "VTT parser encountered an invalid VTT setting: ",
+  //         word,
+  //         " The setting will be ignored.",
+  //       );
+  //     }
+  //
+  //     parser.skipWhitespace();
+  //     word = parser.readWord();
+  //   }
+  // }
+
+  return cueHtml;
+}
+/**
+ * Returns the content of the first "payl" box encountered in the given ISOBMFF
+ * data.
+ * Returns null if not found.
+ * @param {Uint8Array} buffer
+ * @returns {Uint8Array|null}
+ */
+function getPayl(buf: Uint8Array): Uint8Array | null {
+  return getBoxContent(buf, 0x7061796c /* "payl" */);
+}
+// function getIden(buf: Uint8Array): Uint8Array | null {
+//   return getBoxContent(buf, 0x6964656e /* "iden" */);
+// }
+// function getSttg(buf: Uint8Array): Uint8Array | null {
+//   return getBoxContent(buf, 0x73747467 /* "sttg" */);
+// }
diff --git a/src/parsers/texttracks/webvtt/native/parse_vtt_to_cues.ts b/src/parsers/texttracks/webvtt/native/parse_vtt_to_cues.ts
index d531fedc58..e6af28476c 100644
--- a/src/parsers/texttracks/webvtt/native/parse_vtt_to_cues.ts
+++ b/src/parsers/texttracks/webvtt/native/parse_vtt_to_cues.ts
@@ -21,6 +21,8 @@
 
 import type { ICompatVTTCue } from "../../../../compat/browser_compatibility_types";
 import isVTTCue from "../../../../compat/is_vtt_cue";
+import bufferSourceToUint8 from "../../../../utils/buffer_source_to_uint8";
+import { utf8ToStr } from "../../../../utils/string_parsing";
 import getCueBlocks from "../get_cue_blocks";
 import parseCueBlock from "../parse_cue_block";
 import { getFirstLineAfterHeader } from "../utils";
@@ -34,14 +36,24 @@ import toNativeCue from "./to_native_cue";
 /**
  * Parse whole WEBVTT file into an array of cues, to be inserted in a video's
  * TrackElement.
- * @param {string} vttStr
+ * @param {string|BufferSource} input
+ * @param {Number} _timescale
  * @param {Number} timeOffset
  * @returns {Array.<ICompatVTTCue|TextTrackCue>}
  */
 export default function parseVTTStringToVTTCues(
-  vttStr: string,
+  input: string | BufferSource,
+  _timescale: number,
   timeOffset: number,
 ): Array<TextTrackCue | ICompatVTTCue> {
+  let vttStr: string;
+  if (typeof input !== "string") {
+    // Assume UTF-8
+    // TODO: detection?
+    vttStr = utf8ToStr(bufferSourceToUint8(input));
+  } else {
+    vttStr = input;
+  }
   // WEBVTT authorize CRLF, LF or CR as line terminators
   const lines = vttStr.split(/\r\n|\n|\r/);
 
diff --git a/src/tools/TextTrackRenderer/text_track_renderer.ts b/src/tools/TextTrackRenderer/text_track_renderer.ts
index 4376a5f442..119585f366 100644
--- a/src/tools/TextTrackRenderer/text_track_renderer.ts
+++ b/src/tools/TextTrackRenderer/text_track_renderer.ts
@@ -24,6 +24,11 @@ export interface ISetTextTrackArguments {
   data: string;
   /** The format the text track is in (e.g. "ttml" or "vtt") */
   type: string;
+  /**
+   * Optional timescale data context that is used to convert timing information
+   * into seconds.
+   */
+  timescale: number | null;
   /** Offset, in seconds, that will be added to each subtitle's start and end time. */
   timeOffset?: number;
   /**
@@ -81,6 +86,7 @@ export default class TextTrackRenderer {
       chunk: {
         start: 0,
         end: Number.MAX_VALUE,
+        timescale: args.timescale,
         data: args.data,
         language: args.language,
         type: args.type,
diff --git a/src/transports/dash/text_parser.ts b/src/transports/dash/text_parser.ts
index ce08275bb1..b849989d28 100644
--- a/src/transports/dash/text_parser.ts
+++ b/src/transports/dash/text_parser.ts
@@ -108,6 +108,7 @@ function parseISOBMFFEmbeddedTextTrack(
   const chunkData = getISOBMFFEmbeddedTextTrackData(
     context,
     chunkBytes,
+    initTimescale,
     chunkInfos,
     isChunked,
   );
@@ -127,6 +128,7 @@ function parseISOBMFFEmbeddedTextTrack(
  * Parse TextTrack data when it is in plain text form.
  *
  * @param {ArrayBuffer|Uint8Array|string} data - The segment data.
+ * @param {number|undefined} initTimescale
  * @param {boolean} isChunked - If `true`, the `data` may contain only a
  * decodable subpart of the full data in the linked segment.
  * @param {Object} context - Object describing the context of the given
@@ -136,6 +138,7 @@ function parseISOBMFFEmbeddedTextTrack(
  */
 function parsePlainTextTrack(
   data: ArrayBuffer | Uint8Array | string,
+  initTimescale: number | undefined,
   isChunked: boolean,
   context: ISegmentContext,
 ):
@@ -162,7 +165,12 @@ function parsePlainTextTrack(
   } else {
     textTrackData = data;
   }
-  const chunkData = getPlainTextTrackData(context, textTrackData, isChunked);
+  const chunkData = getPlainTextTrackData(
+    context,
+    textTrackData,
+    initTimescale,
+    isChunked,
+  );
   return {
     segmentType: "media",
     chunkData,
@@ -244,7 +252,7 @@ export default function generateTextTrackParser({
         __priv_patchLastSegmentInSidx,
       );
     } else {
-      return parsePlainTextTrack(data, isChunked, context);
+      return parsePlainTextTrack(data, initTimescale, isChunked, context);
     }
   };
 }
diff --git a/src/transports/local/text_parser.ts b/src/transports/local/text_parser.ts
index 2949a9c6bc..f8b7dd6247 100644
--- a/src/transports/local/text_parser.ts
+++ b/src/transports/local/text_parser.ts
@@ -78,6 +78,7 @@ function parseISOBMFFEmbeddedTextTrack(
   const chunkData = getISOBMFFEmbeddedTextTrackData(
     context,
     chunkBytes,
+    initTimescale,
     chunkInfos,
     isChunked,
   );
@@ -96,6 +97,7 @@ function parseISOBMFFEmbeddedTextTrack(
 /**
  * Parse TextTrack data when it is in plain text form.
  * @param {ArrayBuffer|Uint8Array|string} data - The segment data.
+ * @param {number|undefined} initTimescale
  * @param {boolean} isChunked - If `true`, the `data` may contain only a
  * decodable subpart of the full data in the linked segment.
  * @param {Object} context - Object describing the context of the given
@@ -105,6 +107,7 @@ function parseISOBMFFEmbeddedTextTrack(
  */
 function parsePlainTextTrack(
   data: string | Uint8Array | ArrayBuffer,
+  initTimescale: number | undefined,
   isChunked: boolean,
   context: ISegmentContext,
 ):
@@ -130,7 +133,12 @@ function parsePlainTextTrack(
   } else {
     textTrackData = data;
   }
-  const chunkData = getPlainTextTrackData(context, textTrackData, isChunked);
+  const chunkData = getPlainTextTrackData(
+    context,
+    textTrackData,
+    initTimescale,
+    isChunked,
+  );
   const chunkOffset = segment.timestampOffset ?? 0;
   return {
     segmentType: "media",
@@ -192,6 +200,6 @@ export default function textTrackParser(
   } else if (containerType === "mp4") {
     return parseISOBMFFEmbeddedTextTrack(data, isChunked, context, initTimescale);
   } else {
-    return parsePlainTextTrack(data, isChunked, context);
+    return parsePlainTextTrack(data, initTimescale, isChunked, context);
   }
 }
diff --git a/src/transports/smooth/pipelines.ts b/src/transports/smooth/pipelines.ts
index 3378391d5e..2948c1639f 100644
--- a/src/transports/smooth/pipelines.ts
+++ b/src/transports/smooth/pipelines.ts
@@ -411,6 +411,7 @@ export default function (transportOptions: ITransportOptions): ITransportPipelin
           data: _sdData,
           start: segmentStart,
           end: segmentEnd,
+          timescale: initTimescale ?? null,
           language,
         },
         chunkSize,
diff --git a/src/transports/types.ts b/src/transports/types.ts
index 2b7a137ca3..fbac565b4a 100644
--- a/src/transports/types.ts
+++ b/src/transports/types.ts
@@ -419,9 +419,11 @@ export interface IChunkTimeInfo {
 }
 
 /** Text track segment data, once parsed. */
-export interface ITextTrackSegmentData {
+export interface ITextTrackSegmentData<
+  T extends string | BufferSource = string | BufferSource,
+> {
   /** The text track data, in the format indicated in `type`. */
-  data: string;
+  data: T;
   /** The format of `data` (examples: "ttml", "srt" or "vtt") */
   type: string;
   /**
@@ -430,6 +432,11 @@ export interface ITextTrackSegmentData {
    * be parsed.
    */
   language?: string | undefined;
+  /**
+   * Optional timescale data context that is used to convert timing information
+   * found inside the segment into seconds.
+   */
+  timescale: number | null;
   /** start time from which the segment apply, in seconds. */
   start?: number | undefined;
   /** end time until which the segment apply, in seconds. */
@@ -469,7 +476,10 @@ export interface ITransportAudioVideoSegmentPipeline {
 
 export interface ITransportTextSegmentPipeline {
   loadSegment: ISegmentLoader<ILoadedTextSegmentFormat>;
-  parseSegment: ISegmentParser<ILoadedTextSegmentFormat, ITextTrackSegmentData | null>;
+  parseSegment: ISegmentParser<
+    ILoadedTextSegmentFormat,
+    ITextTrackSegmentData<Uint8Array | string> | null
+  >;
 }
 
 export type ITransportSegmentPipeline =
diff --git a/src/transports/utils/parse_text_track.ts b/src/transports/utils/parse_text_track.ts
index 96e0db180b..83902e177d 100644
--- a/src/transports/utils/parse_text_track.ts
+++ b/src/transports/utils/parse_text_track.ts
@@ -83,7 +83,8 @@ export function getPlainTextTrackFormat(
 
 /**
  * @param {Object} content
- * @param {ArrayBuffer|UInt8Array|null} chunkData
+ * @param {ArrayBuffer|UInt8Array|null} chunkBytes
+ * @param {number|undefined} initTimescale
  * @param {Object|null} chunkInfos
  * @param {boolean} isChunked
  * @returns {Object|null}
@@ -99,6 +100,7 @@ export function getISOBMFFEmbeddedTextTrackData(
     language?: string | undefined;
   },
   chunkBytes: Uint8Array,
+  initTimescale: number | undefined,
   chunkInfos: IChunkTimeInfo | null,
   isChunked: boolean,
 ): ITextTrackSegmentData | null {
@@ -124,20 +126,34 @@ export function getISOBMFFEmbeddedTextTrackData(
   }
 
   const type = getISOBMFFTextTrackFormat(codecs);
-  const textData = extractTextTrackFromISOBMFF(chunkBytes);
-  return { data: textData, type, language, start: startTime, end: endTime };
+  let textData: string | BufferSource;
+  if (codecs === "wvtt") {
+    // XXX TODO: check if WEBVTT header first?
+    textData = chunkBytes;
+  } else {
+    textData = extractTextTrackFromISOBMFF(chunkBytes);
+  }
+  return {
+    data: textData,
+    type,
+    language,
+    start: startTime,
+    end: endTime,
+    timescale: initTimescale ?? null,
+  };
 }
 
 /**
- * @param {Object} content
- * @param {ArrayBuffer|UInt8Array|null} chunkData
- * @param {Object|null} chunkInfos
+ * @param {Object} context
+ * @param {ArrayBuffer|UInt8Array|null} textTrackData
+ * @param {number|undefined} initTimescale
  * @param {boolean} isChunked
  * @returns {Object|null}
  */
 export function getPlainTextTrackData(
   context: ISegmentContext,
   textTrackData: string,
+  initTimescale: number | undefined,
   isChunked: boolean,
 ): ITextTrackSegmentData | null {
   const { segment } = context;
@@ -157,5 +173,12 @@ export function getPlainTextTrackData(
   }
 
   const type = getPlainTextTrackFormat(context.codecs, context.mimeType);
-  return { data: textTrackData, type, language: context.language, start, end };
+  return {
+    data: textTrackData,
+    type,
+    language: context.language,
+    start,
+    end,
+    timescale: initTimescale ?? null,
+  };
 }
diff --git a/src/utils/buffer_source_to_uint8.ts b/src/utils/buffer_source_to_uint8.ts
new file mode 100644
index 0000000000..b03b8adec8
--- /dev/null
+++ b/src/utils/buffer_source_to_uint8.ts
@@ -0,0 +1,14 @@
+/**
+ * Convert a vague "BufferSource" binary data into a more exploitable and known
+ * `Uint8Array`.
+ * @param {BufferSource} bs
+ * @returns {Uint8Array}
+ */
+export default function bufferSourceToUint8(bs: BufferSource): Uint8Array {
+  if (bs instanceof Uint8Array) {
+    return bs;
+  } else if (bs instanceof ArrayBuffer) {
+    return new Uint8Array(bs);
+  }
+  return new Uint8Array(bs.buffer);
+}
diff --git a/src/utils/byte_parsing.ts b/src/utils/byte_parsing.ts
index 0d3bff9a6e..2f2ce9465f 100644
--- a/src/utils/byte_parsing.ts
+++ b/src/utils/byte_parsing.ts
@@ -82,6 +82,25 @@ function be4toi(bytes: Uint8Array, offset: number): number {
   );
 }
 
+/**
+ * Translate groups of 4 big-endian bytes representing a two's complement signed
+ * integer to directly that value.
+ * @param {Uint8Array} bytes
+ * @param {Number} offset - The offset (from the start of the given array)
+ * @returns {Number}
+ */
+function be4toiSigned(bytes: Uint8Array, offset: number): number {
+  // Didn't bother overthinking that one though it may be fun
+  return new Int32Array(
+    new Uint8Array([
+      bytes[offset + 0],
+      bytes[offset + 1],
+      bytes[offset + 2],
+      bytes[offset + 3],
+    ]).buffer,
+  )[0];
+}
+
 /**
  * Translate groups of 8 big-endian bytes to Integer.
  * @param {Uint8Array} bytes
@@ -260,6 +279,7 @@ export {
   be2toi,
   be3toi,
   be4toi,
+  be4toiSigned,
   be8toi,
   le2toi,
   le4toi,