diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..1abcdaf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,28 @@ +root = true + +[*] +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 + +[*.yml] +indent_size = 2 + +[*.json] +indent_size = 2 + +[*.html] +indent_size = 2 + +[*.css] +indent_size = 2 + +[.*] +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false + diff --git a/package.json b/package.json index d86d30f..6f637a2 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ "build:prod": "cross-env NODE_ENV=production webpack --config ./webpack.config.js --progress --profile --color --display-error-details --display-cached --bail", "clean": "npm cache clear && rimraf -- dist", "test": "npm run build:dev && mocha --compilers js:babel-core/register --colors ./test/*.spec.js", - "test:watch": "mocha --compilers js:babel-core/register --colors -w ./test/*.spec.js" + "test:watch": "mocha --compilers js:babel-core/register --colors -w ./test/*.spec.js", + "lint": "tslint --project ." }, "author": "jesus@epiclabs.io", "license": "ISC", @@ -39,7 +40,6 @@ "sass-loader": "^6.0.6", "style-loader": "^0.18.2", "tslint": "^5.1.0", - "tslint-loader": "^3.5.2", "typescript": "^2.2.2", "webpack": "^2.4.1", "webpack-build-notifier": "^0.1.14", diff --git a/samples/.editorconfig b/samples/.editorconfig new file mode 100644 index 0000000..4ae90dd --- /dev/null +++ b/samples/.editorconfig @@ -0,0 +1,2 @@ +[*.js] +indent_size = 2 diff --git a/samples/index.html b/samples/index.html index c9f296d..b004be2 100644 --- a/samples/index.html +++ b/samples/index.html @@ -1,47 +1,20 @@ - + - <%= htmlWebpackPlugin.options.title %> + Inspector.js - media file viewer and analyzer - - + -

Basic sample

+

Inspector.js - media file viewer and analyzer

Sample media assets: - +
-
-
-
-
-
-
- -
- - + + - \ No newline at end of file + diff --git a/samples/main.js b/samples/main.js new file mode 100644 index 0000000..1b7ab94 --- /dev/null +++ b/samples/main.js @@ -0,0 +1,248 @@ +document.getElementById('fileInput') + .addEventListener('change', onFileChosen, false); + +var fileChosen = null; +function onFileChosen(event) { + fileChosen = event.target.files[0]; +} + +function abortAllLoading() { + document.getElementById('loadProgress').value = 0; + if (request) { + request.onload = null; + request.onprogress = null; + request.onerror = null; + request.abort(); + request = null; + } + if (reader) { + reader.onload = null; + reader.onprogress = null; + reader.onerror = null; + reader.abort(); + reader = null; + } +} + +var request = null; +function loadRemoteMedia(mediaUrl, onLoaded) { + abortAllLoading(); + + var req = request = new XMLHttpRequest(); + req.open('GET', mediaUrl, true); + req.responseType = 'arraybuffer'; + + req.onload = function () { + + if (req.status >= 400) { + alert('An error (' + req.status + ') happened trying to load the remote resource: ' + mediaUrl); + abortAllLoading(); + return; + } + + var arrayBuffer = req.response; + onLoaded(arrayBuffer); + }; + req.onprogress = function (event) { + document.getElementById('loadProgress').value = (event.loaded / event.total) * 100; + } + req.onerror = function () { + alert('A fatal error happened trying to load the remote resource: ' + mediaUrl); + } + req.send(null); +} + +var reader = null; +function loadLocalMedia(file, onLoaded) { + abortAllLoading(); + + reader = new FileReader(); + reader.onload = function onFileRead(event) { + onLoaded(reader.result); + }; + reader.onprogress = function(event) { + document.getElementById('loadProgress').value = (event.loaded / event.total) * 100; + } + reader.onerror = function () { + alert('A fatal error happened trying to load the file resource: ' + file); + } + reader.readAsArrayBuffer(file); +} + +function inspectMedia(type) { + clearTracksInfo(); + clearAtomsTree(); + clearAtomDetails(); + + switch (type) { + case 'local': + if (!fileChosen) { + window.alert('Please choose a file...'); + return; + } + document.getElementById('uri').value = fileChosen.name; + loadLocalMedia(fileChosen, onMediaLoaded.bind(null, fileChosen.name)); + break; + case 'remote-sample-media': + var mediaUrl = document.getElementById("mediaUrl").value; + if (!mediaUrl) { + window.alert('Please choose a URL...'); + return; + } + document.getElementById('uri').value = mediaUrl; + loadRemoteMedia(mediaUrl, onMediaLoaded.bind(null, mediaUrl)); + break; + case 'remote-input-url': + var mediaUrl = document.getElementById('urlInput').value; + if (!mediaUrl) { + window.alert('Please input a URL...'); + return; + } + document.getElementById('uri').value = mediaUrl; + loadRemoteMedia(mediaUrl, onMediaLoaded.bind(null, mediaUrl)); + break; + } +} + +function onMediaLoaded(uri, arrayBuffer) { + document.getElementById('totalBytes') + .value = arrayBuffer.byteLength; + + var byteArray = new Uint8Array(arrayBuffer); + + var demuxer = createDemuxer(uri); + + demuxer.append(byteArray); + demuxer.end(); + + updateAtomsInfo(demuxer); + updateTracksInfo(demuxer); +} + +function createDemuxer(uri) { + // Find out demuxer looking at file extension. "Enough" good for this sample + var ext = uri.split('.').pop(); + if (ext === 'ts') { + return inspectorjs.createMpegTSDemuxer(); + } else if (ext === 'webm') { + return inspectorjs.createWebMDemuxer(); + } else { // TODO: add regex here + return inspectorjs.createMp4Demuxer(); + } +} + +function updateTracksInfo(demuxer) { + var el = document.getElementById('tracks'); + for (var trackId in demuxer.tracks) { + addTrackInfo(el, demuxer.tracks[trackId]); + } +} + +function addTrackInfo(el, track) { + var trackEl = document.createElement('div'); + + // Add track general info + trackEl.innerHTML = '

Track # ' + track.id + ' - ' + track.mimeType + '

'; + + // Add frame details + var framesEl = document.createElement('ul'); + for (var frame of track.frames) { + var frameEl = document.createElement('li'); + frameEl.innerHTML + = 'Frame: type = ' + frame.frameType + ' | ' + + '[ PTS / DTS = ' + + frame.getDecodingTimestampInSeconds().toFixed(3) + + ' / ' + + frame.getPresentationTimestampInSeconds().toFixed(3) + + ' ( ' + + (frame.getDurationInSeconds().toFixed(3) || 0) + ' )' + + ' @' + frame.bytesOffset + + ' -> ' + + (frame.bytesOffset + frame.size - 1) + + ' ( ' + frame.size + ' ) ' + + ' ]' + ; + + framesEl.appendChild(frameEl); + } + trackEl.appendChild(framesEl); + + // Add track info to dom + el.appendChild(trackEl); +} + +function updateAtomsInfo(demuxer) { + // Only makes sense for MP4 files + if (demuxer.atoms) { + var el = document.getElementById('atoms'); + addAtoms(el, demuxer.atoms); + } +} + +// Add information about MP4 atoms +function addAtoms(parent, atoms) { + if (!atoms || atoms.length === 0) return; + + var containerEl = document.createElement('ul'); + for (var atom of atoms) { + var atomEl = document.createElement('li'); + atomEl.innerHTML = atom.type + ', ' + atom.size + ' bytes'; + atomEl.onclick = renderAtomDetails.bind(null, atom); + + containerEl.appendChild(atomEl); + + if (atom.atoms && atom.atoms.length > 0) { + addAtoms(atomEl, atom.atoms); + } + } + + parent.appendChild(containerEl); +} + +function renderAtomDetails(atom, event) { + event.stopPropagation(); + var el = document.getElementById('details'); + clearAtomDetails(); + + var containerEl = document.createElement('ul'); + for (var p in atom) { + if (atom.hasOwnProperty(p) && shouldRenderAtom(p, atom)) { + var item = document.createElement('li'); + item.innerHTML = '

' + p + ":

" + JSON.stringify(atom[p], null, 4) + '

'; + containerEl.appendChild(item); + //containerEl.appendChild(document.createElement('br')); + } + } + + el.appendChild(containerEl); +} + +function clearTracksInfo() { + var el = document.getElementById('tracks'); + + // Remove any child of tracks element + while (el.firstChild) { + el.removeChild(el.firstChild); + } +} + +function clearAtomsTree() { + var el = document.getElementById('atoms'); + + // Remove any child of tracks element + while (el.firstChild) { + el.removeChild(el.firstChild); + } +} + +function clearAtomDetails() { + var el = document.getElementById('details'); + while (el.firstChild) { + el.removeChild(el.firstChild); + } +} + +function shouldRenderAtom(p, atom) { + return p !== 'atoms' && p !== 'containerDataOffset' && p !== 'constructor'; + //&& !Array.isArray(atom[p]); +} diff --git a/samples/style.css b/samples/style.css new file mode 100644 index 0000000..217f44e --- /dev/null +++ b/samples/style.css @@ -0,0 +1,61 @@ +body { + font-family:'Segoe UI', Tahoma, Geneva, Verdana, sans-serif +} + +input { + font-family:'Segoe UI', Tahoma, Geneva, Verdana, sans-serif +} + +input[type=button] { + font-size: 1em; + background-color: #94bf1e; +} + +input[type=file] { + font-size: 1em; + border: 1px solid gray; +} + +input[type=input] { + font-size: 1em; +} + +#viewer { + width: 100%; + overflow-x: auto; + position: relative; + scroll-behavior: auto; +} + +#atoms-info { + margin-top: 40px; + background-color: #94bf1e; + color: #ffffff; + border: 1px solid black; + display: flex; + float:left; + /*width: 50%;*/ +} + +.atom_column { + flex: 50%; +} + +#atoms li { + cursor: pointer; +} + +#details { + background-color: #000000; +} + +#tracks { + float:right; + /*width: 50%;*/ + margin-top: 20px; +} + +#tracks li { + font-size: 0.8em; +} + diff --git a/src/codecs/h264/nal-units.ts b/src/codecs/h264/nal-units.ts index 4bb7251..2f023df 100644 --- a/src/codecs/h264/nal-units.ts +++ b/src/codecs/h264/nal-units.ts @@ -1,9 +1,90 @@ import { Size, FrameRate } from '../video-types'; +export enum NAL_UNIT_TYPE { + SLICE = 1, + DPA, + DPB, + DPC, + IDR, + SEI, + SPS, + PPS, + AUD, + END_SEQUENCE, + END_STREAM, + FILLER_DATA, + SPS_EXT, + PREFIX, + SUBSET_SPS, + RESERVED_16, + RESERVED_17, + RESERVED_18, + SLICE_AUX_PIC, + SLICE_EXT, + SLICE_EXT_DEPTH, + RESERVED_22, + RESERVED_23 +} + +export enum SLICE_TYPE { + P = 0, + B, + I, + SP, + SI +} + +export enum FRAME_TYPE { + I = 'I', + P = 'P', + B = 'B', + SI = 'SI', + SP = 'SP', + NONE = '' +} + +export function mapNaluSliceToFrameType(sliceType: SLICE_TYPE): FRAME_TYPE { + if (sliceType > 4) { + sliceType = sliceType - 5; + } + switch (sliceType) { + case SLICE_TYPE.B: + return FRAME_TYPE.B; + case SLICE_TYPE.I: + return FRAME_TYPE.I; + case SLICE_TYPE.P: + return FRAME_TYPE.P; + case SLICE_TYPE.SI: + return FRAME_TYPE.SI; + case SLICE_TYPE.SP: + return FRAME_TYPE.SP; + default: + return FRAME_TYPE.NONE; + } +} + export class Sps { - constructor (public profile: string, public level: string, public bitDepth: number, - public chromaFormat: number, chromaFormatStr: string, public frameRate: FrameRate, - public sar: Size, public codecSize: Size, public presentSize: Size ) { + constructor ( + public id: number, + public profile: string, + public profileIdc: number, + public level: string, + public levelIdc: number, + public bitDepth: number, + public chromaFormat: number, + public chromaFormatStr: string, + public frameRate: FrameRate, + public sar: Size, + public codecSize: Size, + public presentSize: Size ) { // do nothing } } + +export class Pps { + constructor( + public id: number, + public spsId: number, + public entropyCodingModeFlag: boolean, + ) {} +} diff --git a/src/codecs/h264/sps-parser.ts b/src/codecs/h264/param-set-parser.ts similarity index 66% rename from src/codecs/h264/sps-parser.ts rename to src/codecs/h264/param-set-parser.ts index 7fba59c..7e72157 100644 --- a/src/codecs/h264/sps-parser.ts +++ b/src/codecs/h264/param-set-parser.ts @@ -1,17 +1,68 @@ import { BitReader } from '../../utils/bit-reader'; -import { Sps } from './nal-units'; +import { Sps, Pps } from './nal-units'; import { Size, FrameRate } from '../video-types'; -export class SPSParser { - public static parseSPS(data: Uint8Array): Sps { +export class H264ParameterSetParser { + + static getProfileString(profile_idc: number): string { + switch (profile_idc) { + case 66: + return 'Baseline'; + case 77: + return 'Main'; + case 88: + return 'Extended'; + case 100: + return 'High'; + case 110: + return 'High10'; + case 122: + return 'High422'; + case 244: + return 'High444'; + default: + return 'Unspecified Profile-IDC value: ' + profile_idc; + } + } + + static getLevelString(level_idc: number): string { + return (level_idc / 10).toFixed(1); + } + + static getChromaFormatString(chroma: number): string { + switch (chroma) { + case 420: + return '4:2:0'; + case 422: + return '4:2:2'; + case 444: + return '4:4:4'; + default: + return 'Unspecified chroma-format value: ' + chroma; + } + } + + static parsePPS(data: Uint8Array): Pps { + const gb: BitReader = new BitReader(data); + + const id: number = gb.readUEG(); + const spsId: number = gb.readUEG(); + const entropyCodingMode: boolean = gb.readBool(); + + return new Pps(id, spsId, entropyCodingMode); + } + + static parseSPS(data: Uint8Array): Sps { let gb: BitReader = new BitReader(data); const profile_idc: number = gb.readByte(); gb.readByte(); const level_idc: number = gb.readByte(); - gb.readUEG(); - const profile_string: string = SPSParser.getProfileString(profile_idc); - const level_string: string = SPSParser.getLevelString(level_idc); + const seq_parameter_set_id: number = gb.readUEG(); + + const profile_string: string = H264ParameterSetParser.getProfileString(profile_idc); + const level_string: string = H264ParameterSetParser.getLevelString(level_idc); + let chroma_format_idc: number = 1; let chroma_format: number = 420; let chroma_format_table: number[] = [0, 420, 422, 444]; @@ -38,9 +89,9 @@ export class SPSParser { for (let i: number = 0; i < scaling_list_count; i++) { if (gb.readBool()) { if (i < 6) { - SPSParser.skipScalingList(gb, 16); + H264ParameterSetParser.skipScalingList(gb, 16); } else { - SPSParser.skipScalingList(gb, 64); + H264ParameterSetParser.skipScalingList(gb, 64); } } } @@ -84,11 +135,19 @@ export class SPSParser { frame_crop_bottom_offset = gb.readUEG(); } - let sar_width: number = 1, sar_height: number = 1; - let fps: number = 0, fps_fixed: boolean = true, fps_num: number = 0, fps_den: number = 0; + let sar_width: number = 1; + let sar_height: number = 1; + let fps: number = 0, + fps_fixed: boolean = true, + fps_num: number = 0, + fps_den: number = 0; + + // @see https://sourceforge.net/p/h264bitstream/code/HEAD/tree/trunk/h264bitstream/h264_stream.c#l363 let vui_parameters_present_flag: boolean = gb.readBool(); + if (vui_parameters_present_flag) { + if (gb.readBool()) { // aspect_ratio_info_present_flag const aspect_ratio_idc: number = gb.readByte(); const sar_w_table: number[] = [1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2]; @@ -103,28 +162,54 @@ export class SPSParser { } } - if (gb.readBool()) { + if (gb.readBool()) { //overscan_info_present gb.readBool(); } - if (gb.readBool()) { - gb.readBits(4); + + if (gb.readBool()) { // video_signal_type_present + gb.readBits(3); + gb.readBool(); if (gb.readBool()) { - gb.readBits(24); + gb.readBits(8); + gb.readBits(8); + gb.readBits(8); } } - if (gb.readBool()) { + + if (gb.readBool()) { // chroma_loc_info_present gb.readUEG(); gb.readUEG(); } - if (gb.readBool()) { + + if (gb.readBool()) { // timing_info_present + const num_units_in_tick: number = gb.readBits(32); const time_scale: number = gb.readBits(32); + fps_fixed = gb.readBool(); fps_num = time_scale; - fps_den = num_units_in_tick * 2; + fps_den = num_units_in_tick; fps = fps_num / fps_den; } + + /* + sps->timing_info_present_flag = get_bits1(gb); + if (sps->timing_info_present_flag) { + unsigned num_units_in_tick = get_bits_long(gb, 32); + unsigned time_scale = get_bits_long(gb, 32); + if (!num_units_in_tick || !time_scale) { + av_log(avctx, AV_LOG_ERROR, + "time_scale/num_units_in_tick invalid or unsupported (%u/%u)\n", + time_scale, num_units_in_tick); + sps->timing_info_present_flag = 0; + } else { + sps->num_units_in_tick = num_units_in_tick; + sps->time_scale = time_scale; + } + sps->fixed_frame_rate_flag = get_bits1(gb); + } + */ } let sarScale: number = 1; @@ -151,50 +236,20 @@ export class SPSParser { const present_width: number = Math.ceil(codec_width * sarScale); - gb.destroy(); - gb = null; - - return new Sps(profile_string, level_string, bit_depth, chroma_format, - SPSParser.getChromaFormatString(chroma_format), new FrameRate(fps_fixed, fps, fps_den, fps_num), - new Size(sar_width, sar_height), new Size(codec_width, codec_height), new Size(present_width, codec_height)); - } - - private static getProfileString(profile_idc: number): string { - switch (profile_idc) { - case 66: - return 'Baseline'; - case 77: - return 'Main'; - case 88: - return 'Extended'; - case 100: - return 'High'; - case 110: - return 'High10'; - case 122: - return 'High422'; - case 244: - return 'High444'; - default: - return 'Unknown'; - } - } - - private static getLevelString(level_idc: number): string { - return (level_idc / 10).toFixed(1); - } - - private static getChromaFormatString(chroma: number): string { - switch (chroma) { - case 420: - return '4:2:0'; - case 422: - return '4:2:2'; - case 444: - return '4:4:4'; - default: - return 'Unknown'; - } + return new Sps( + seq_parameter_set_id, + profile_string, + profile_idc, + level_string, + level_idc, + bit_depth, + chroma_format, + H264ParameterSetParser.getChromaFormatString(chroma_format), + new FrameRate(fps_fixed, fps, fps_den, fps_num), + new Size(sar_width, sar_height), + new Size(codec_width, codec_height), + new Size(present_width, codec_height) + ); } private static skipScalingList(gb: BitReader, count: number): void { diff --git a/src/codecs/video-types.ts b/src/codecs/video-types.ts index 3500c89..49c1d51 100644 --- a/src/codecs/video-types.ts +++ b/src/codecs/video-types.ts @@ -1,6 +1,9 @@ export class FrameRate { - constructor (public fixed: boolean, public fps: number, - public fpsDen: number, public fpsNum: number) { + constructor ( + public fixed: boolean, + public fps: number, + public fpsDen: number, + public fpsNum: number) { // do nothing } } diff --git a/src/demuxer/frame.ts b/src/demuxer/frame.ts index 2031a0e..a67d1f9 100644 --- a/src/demuxer/frame.ts +++ b/src/demuxer/frame.ts @@ -1,42 +1,50 @@ -export const MICROSECOND_TIMESCALE = 1000000; +import { FRAME_TYPE } from "../codecs/h264/nal-units"; export class Frame { - // fixme: should be an enum - public static IDR_FRAME: string = 'I'; - public static P_FRAME: string = 'P'; - public static B_FRAME: string = 'B'; - - private presentationTimeUs: number = 0; - constructor ( - public frameType: string, - public timeUs: number, - public size: number, - public duration: number = NaN, - public bytesOffset: number = -1, - presentationTimeOffsetUs: number = 0 + public readonly frameType: FRAME_TYPE, + public readonly dts: number, + private _cto: number, + public readonly duration: number, + public readonly size: number, + private _bytesOffset: number = NaN ) { - this.setPresentationTimeOffsetUs(presentationTimeOffsetUs); - } - - getDecodingTimeUs() { - return this.timeUs; + if (dts < 0 || !Number.isSafeInteger(dts)) { + throw new Error(`Frame: DTS has to be positive safe-integer value but is ${dts}`); + } + if (size < 0 || !Number.isSafeInteger(size)) { + throw new Error(`Frame: Size has to be positive safe-integer value but is ${size}`); + } + if (duration < 0 || !Number.isSafeInteger(duration)) { + throw new Error(`Frame: Duration has to be positive safe-integer value but is ${duration}`); + } + this.setPresentationTimeOffset(_cto); } - getPresentationTimeUs(): number { - return this.presentationTimeUs; + get bytesOffset() { + return this._bytesOffset; } - setPresentationTimeOffsetUs(presentationTimeOffsetUs: number) { - this.presentationTimeUs = this.timeUs + presentationTimeOffsetUs; + get cto() { + return this._cto; } - getPresentationTimestampInSeconds(): number { - return this.getPresentationTimeUs() / MICROSECOND_TIMESCALE; + /** + * aka "CTO" + * @param cto + */ + setPresentationTimeOffset(cto: number) { + if (cto < 0 || !Number.isSafeInteger(cto)) { + throw new Error(`Frame: CTO has to be positive safe-integer value but is ${cto}`); + } + this._cto = cto; } - getDecodingTimestampInSeconds() { - return this.timeUs / MICROSECOND_TIMESCALE; + setBytesOffset(bytesOffset: number) { + if (bytesOffset < 0 || !Number.isSafeInteger(bytesOffset)) { + throw new Error(`Frame: Bytes-offset has to be positive safe-integer value but is ${bytesOffset}`); + } + this._bytesOffset = bytesOffset; } } diff --git a/src/demuxer/mp4/atoms/atom.ts b/src/demuxer/mp4/atoms/atom.ts index ea193b1..97147e9 100644 --- a/src/demuxer/mp4/atoms/atom.ts +++ b/src/demuxer/mp4/atoms/atom.ts @@ -80,8 +80,7 @@ export class Atom { public static mfhd: string = 'mfhd'; public static emsg: string = 'emsg'; - constructor (public type: string, public size: number) { - } + constructor (public type: string, public size: number) {} public static isContainerBox(type: string): boolean { return type === Atom.moov || type === Atom.trak || type === Atom.mdia diff --git a/src/demuxer/mp4/atoms/avcC.ts b/src/demuxer/mp4/atoms/avcC.ts index 9af1265..3a34f99 100644 --- a/src/demuxer/mp4/atoms/avcC.ts +++ b/src/demuxer/mp4/atoms/avcC.ts @@ -1,7 +1,7 @@ import ByteParserUtils from '../../../utils/byte-parser-utils'; -import { Atom, ContainerAtom } from './atom'; -import { SPSParser } from '../../../codecs/h264/sps-parser'; -import { Sps } from '../../../codecs/h264/nal-units'; +import { Atom } from './atom'; +import { H264ParameterSetParser } from '../../../codecs/h264/param-set-parser'; +import { Sps, Pps } from '../../../codecs/h264/nal-units'; export class AvcC extends Atom { public version: number; @@ -14,10 +14,13 @@ export class AvcC extends Atom { public sps: Uint8Array[]; public spsParsed: Sps[]; public pps: Uint8Array[]; + public ppsParsed: Pps[]; + public data: Uint8Array; public static parse(data: Uint8Array): Atom { const avcC: AvcC = new AvcC(Atom.avcC, data.byteLength); + avcC.data = data; avcC.version = data[0]; avcC.profile = data[1]; avcC.profileCompatibility = data[2]; @@ -35,17 +38,21 @@ export class AvcC extends Atom { avcC.sps.push(sps); offset += spsSize; - avcC.spsParsed.push(SPSParser.parseSPS(sps.subarray(1, spsSize))); + avcC.spsParsed.push(H264ParameterSetParser.parseSPS(sps.subarray(1, spsSize))); } avcC.numOfPictureParameterSets = data[offset] & 0x1f; avcC.pps = []; + avcC.ppsParsed = []; offset++; for (let i: number = 0; i < avcC.numOfPictureParameterSets; i++) { const ppsSize: number = ByteParserUtils.parseUint16(data, offset); offset += 2; + const pps: Uint8Array = new Uint8Array(data.subarray(offset, offset + ppsSize)); avcC.pps.push(new Uint8Array(data.subarray(offset, offset + ppsSize))); offset += ppsSize; + + avcC.ppsParsed.push(H264ParameterSetParser.parsePPS(pps.subarray(1, ppsSize))) } return avcC; diff --git a/src/demuxer/mp4/atoms/ctts.ts b/src/demuxer/mp4/atoms/ctts.ts new file mode 100644 index 0000000..c428e0b --- /dev/null +++ b/src/demuxer/mp4/atoms/ctts.ts @@ -0,0 +1,29 @@ +import ByteParserUtils from '../../../utils/byte-parser-utils'; +import { Atom } from './atom'; + +export class CTimeOffsetToSampleEntry { + constructor(public sampleCount: number, public sampleCTimeOffset: number) { + } +} + +export class Ctts extends Atom { + public version: number; + public flags: Uint8Array; + public cTimeOffsetToSamples: CTimeOffsetToSampleEntry[] = []; + + public static parse(data: Uint8Array): Atom { + const ctts: Ctts = new Ctts(Atom.ctts, data.byteLength); + ctts.version = data[0]; + ctts.flags = data.subarray(1, 4); + ctts.cTimeOffsetToSamples = []; + const entryCount: number = ByteParserUtils.parseUint32(data, 4); + let offset: number = 8; + for (let i: number = 0; i < entryCount; i++) { + ctts.cTimeOffsetToSamples.push(new CTimeOffsetToSampleEntry( + ByteParserUtils.parseUint32(data, offset), + ByteParserUtils.parseUint32(data, offset + 4))); + offset += 8; + } + return ctts; + } +} diff --git a/src/demuxer/mp4/atoms/esds.ts b/src/demuxer/mp4/atoms/esds.ts index 0946b3d..374d22b 100644 --- a/src/demuxer/mp4/atoms/esds.ts +++ b/src/demuxer/mp4/atoms/esds.ts @@ -21,12 +21,16 @@ export class Esds extends Atom { public esId: number; public streamPriority: number; public decoderConfig: DecoderConfig; + public data: Uint8Array; public static parse(data: Uint8Array): Atom { const esds: Esds = new Esds(Atom.esds, data.byteLength); + esds.data = data; + esds.version = data[0]; esds.flags = data.subarray(1, 4); + esds.esId = ByteParserUtils.parseUint16(data, 6); esds.streamPriority = data[8] & 0x1f; esds.decoderConfig = new DecoderConfig( diff --git a/src/demuxer/mp4/atoms/index.ts b/src/demuxer/mp4/atoms/index.ts index e6a075a..41226b9 100644 --- a/src/demuxer/mp4/atoms/index.ts +++ b/src/demuxer/mp4/atoms/index.ts @@ -29,9 +29,13 @@ import { Pssh } from './pssh'; import { HvcC } from './hvcC'; import { Hvc1 } from './hvc1'; import { Hev1 } from './hev1'; +import { Stss } from './stss'; +import { Ctts } from './ctts'; export const boxesParsers: {[type: string] : (data: Uint8Array) => Atom } = { }; +// Q: can this generalized or abstracted as opposed to explicit registration? + boxesParsers[Atom.ftyp] = Ftyp.parse; boxesParsers[Atom.stsd] = Stsd.parse; boxesParsers[Atom.avc1] = Avc1.parse; @@ -44,8 +48,10 @@ boxesParsers[Atom.hdlr] = Hdlr.parse; boxesParsers[Atom.vmhd] = Vmhd.parse; boxesParsers[Atom.dref] = Dref.parse; boxesParsers[Atom.stts] = Stts.parse; +boxesParsers[Atom.stss] = Stss.parse; boxesParsers[Atom.stsc] = Stsc.parse; boxesParsers[Atom.stsz] = Stsz.parse; +boxesParsers[Atom.ctts] = Ctts.parse; boxesParsers[Atom.stco] = Stco.parse; boxesParsers[Atom.smhd] = Smhd.parse; boxesParsers[Atom.mp4a] = Mp4a.parse; diff --git a/src/demuxer/mp4/atoms/mdat.ts b/src/demuxer/mp4/atoms/mdat.ts index 64bbaf1..58b69c7 100644 --- a/src/demuxer/mp4/atoms/mdat.ts +++ b/src/demuxer/mp4/atoms/mdat.ts @@ -3,12 +3,16 @@ import { Atom } from './atom'; export class Mdat extends Atom { + public data: Uint8Array = null; + public static parse(data: Uint8Array): Atom { const mdat: Mdat = new Mdat(Atom.mdat, data.byteLength); - Mdat.parsePayload(data); + mdat.data = data; + //Mdat.parsePayload(data); return mdat; } + /* private static parsePayload(data: Uint8Array): void { let length: number; for (let i: number = 0; i + 4 < data.byteLength; i += length) { @@ -16,12 +20,17 @@ export class Mdat extends Atom { i += 4; if (length <= 0) { - console.log('is this an H264 stream?'); - continue; + //console.log('is this an H264 stream?'); + //continue; + + // let's break here since otherwise this crashes on AAC data + warn('aborted parsing mdat'); + break; } const nalType: number = data[i] & 0x1F; // TODO: do something } } + */ } diff --git a/src/demuxer/mp4/atoms/mvhd.ts b/src/demuxer/mp4/atoms/mvhd.ts index 08e98f1..bed1cff 100644 --- a/src/demuxer/mp4/atoms/mvhd.ts +++ b/src/demuxer/mp4/atoms/mvhd.ts @@ -30,10 +30,11 @@ export class Mvhd extends Atom { mvhd.creationTime = ByteParserUtils.parseIsoBoxDate(ByteParserUtils.parseUint32(data, offset)); offset += 8; mvhd.modificationTime = ByteParserUtils.parseIsoBoxDate(ByteParserUtils.parseUint32(data, offset)); - offset += 4; - mvhd.timescale = ByteParserUtils.parseUint32(data, offset); offset += 8; + mvhd.timescale = ByteParserUtils.parseUint32(data, offset); + offset += 4; mvhd.duration = ByteParserUtils.parseUint32(data, offset); + offset += 8; } else { mvhd.creationTime = ByteParserUtils.parseIsoBoxDate(ByteParserUtils.parseUint32(data, offset)); offset += 4; @@ -42,8 +43,8 @@ export class Mvhd extends Atom { mvhd.timescale = ByteParserUtils.parseUint32(data, offset); offset += 4; mvhd.duration = ByteParserUtils.parseUint32(data, offset); + offset += 4; } - offset += 4; mvhd.rate = ByteParserUtils.parseUint16(data, offset) + ByteParserUtils.parseUint16(data, offset + 2) / 16; offset += 4; diff --git a/src/demuxer/mp4/atoms/sidx.ts b/src/demuxer/mp4/atoms/sidx.ts index 3ae6abb..203b654 100644 --- a/src/demuxer/mp4/atoms/sidx.ts +++ b/src/demuxer/mp4/atoms/sidx.ts @@ -29,8 +29,8 @@ export class Sidx extends Atom { sidx.firstOffset = ByteParserUtils.parseUint32(data, 16); offset = 20; } else { - sidx.earliestPresentationTime = ByteParserUtils.parseLong64(data, 12); - sidx.firstOffset = ByteParserUtils.parseLong64(data, 20); + sidx.earliestPresentationTime = ByteParserUtils.parseUint64(data, 12); + sidx.firstOffset = ByteParserUtils.parseUint64(data, 20); offset = 28; } diff --git a/src/demuxer/mp4/atoms/stsc.ts b/src/demuxer/mp4/atoms/stsc.ts index 379a28e..3dfc672 100644 --- a/src/demuxer/mp4/atoms/stsc.ts +++ b/src/demuxer/mp4/atoms/stsc.ts @@ -2,7 +2,9 @@ import ByteParserUtils from '../../../utils/byte-parser-utils'; import { Atom } from './atom'; export class SampleToChunkEntry { - constructor(public firstChunk: number, public samplesPerChunk: number, + constructor( + public firstChunk: number, + public samplesPerChunk: number, public sampleDescriptionIndex: number) { } } diff --git a/src/demuxer/mp4/atoms/stss.ts b/src/demuxer/mp4/atoms/stss.ts new file mode 100644 index 0000000..1106ca7 --- /dev/null +++ b/src/demuxer/mp4/atoms/stss.ts @@ -0,0 +1,23 @@ +import { Atom } from "./atom"; +import ByteParserUtils from "../../../utils/byte-parser-utils"; + +export class Stss extends Atom { + + public version: number; + public flags: Uint8Array; + public syncSampleNumbers: number[] = []; + + public static parse(data: Uint8Array): Atom { + const stss: Stss = new Stss(Atom.stss, data.byteLength); + stss.version = data[0]; + stss.flags = data.subarray(1, 4); + const entryCount: number = ByteParserUtils.parseUint32(data, 4); + + let offset: number = 8; + for (let i: number = 0; i < entryCount; i++) { + stss.syncSampleNumbers.push(ByteParserUtils.parseUint32(data, offset)); + offset += 8; + } + return stss; + } +} diff --git a/src/demuxer/mp4/atoms/stts.ts b/src/demuxer/mp4/atoms/stts.ts index 155bfa1..e6ad7bf 100644 --- a/src/demuxer/mp4/atoms/stts.ts +++ b/src/demuxer/mp4/atoms/stts.ts @@ -9,7 +9,7 @@ export class TimeToSampleEntry { export class Stts extends Atom { public version: number; public flags: Uint8Array; - public timeToSamples: TimeToSampleEntry[]; + public timeToSamples: TimeToSampleEntry[] = []; public static parse(data: Uint8Array): Atom { const stts: Stts = new Stts(Atom.stts, data.byteLength); diff --git a/src/demuxer/mp4/atoms/tfhd.ts b/src/demuxer/mp4/atoms/tfhd.ts index 884673c..a03ec0c 100644 --- a/src/demuxer/mp4/atoms/tfhd.ts +++ b/src/demuxer/mp4/atoms/tfhd.ts @@ -24,7 +24,7 @@ export class Tfhd extends Atom { let offset: number = 8; if (baseDataOffsetPresent) { - tfhd.baseDataOffset = ByteParserUtils.parseLong64(data, 12); + tfhd.baseDataOffset = ByteParserUtils.parseUint64(data, 12); offset += 8; } if (sampleDescriptionIndexPresent) { diff --git a/src/demuxer/mp4/atoms/trun.ts b/src/demuxer/mp4/atoms/trun.ts index 0536966..7031e5a 100644 --- a/src/demuxer/mp4/atoms/trun.ts +++ b/src/demuxer/mp4/atoms/trun.ts @@ -2,9 +2,15 @@ import ByteParserUtils from '../../../utils/byte-parser-utils'; import { Atom } from './atom'; export class SampleFlags { - constructor(public isLeading: number, public dependsOn: number, public isDependedOn: number, - public hasRedundancy: number, public paddingValue: number, public isSyncFrame: boolean, - public degradationPriority: number) { + constructor( + public isLeading: number, + public dependsOn: number, + public isDependedOn: number, + public hasRedundancy: number, + public paddingValue: number, + public isSyncFrame: boolean, + public degradationPriority: number + ) { } } @@ -22,9 +28,9 @@ export class Sample { export class Trun extends Atom { public version: number; public flags: Uint8Array; - public trackId: number; - public dataOffset: number; - public samples: Sample[]; + public trackId: number; // fixme: not used, why? + public dataOffset: number = 0; + public samples: Sample[] = []; public static parse(data: Uint8Array): Atom { const trun: Trun = new Trun(Atom.trun, data.byteLength); @@ -40,8 +46,7 @@ export class Trun extends Atom { let sampleCount: number = ByteParserUtils.parseUint32(data, 4); let offset: number = 8; - trun.samples = []; - let totalSize: number = 0; + let totalSizeOfSamples: number = 0; // for debug/test if (dataOffsetPresent) { trun.dataOffset = ByteParserUtils.parseUint32(data, offset); offset += 4; @@ -56,7 +61,7 @@ export class Trun extends Atom { } if (sampleSizePresent) { sample.size = ByteParserUtils.parseUint32(data, offset); - totalSize += sample.size; + totalSizeOfSamples += sample.size; offset += 4; } if (sampleCompositionTimeOffsetPresent) { @@ -74,7 +79,7 @@ export class Trun extends Atom { } if (sampleSizePresent) { sample.size = ByteParserUtils.parseUint32(data, offset); - totalSize += sample.size; + totalSizeOfSamples += sample.size; offset += 4; } if (sampleFlagsPresent) { @@ -90,7 +95,7 @@ export class Trun extends Atom { return trun; } - private static parseFlags(data: Uint8Array): SampleFlags { + static parseFlags(data: Uint8Array): SampleFlags { return new SampleFlags( (data[0] & 0x0c) >>> 2, (data[0] & 0x03), diff --git a/src/demuxer/mp4/mp4-demuxer.ts b/src/demuxer/mp4/mp4-demuxer.ts index d164203..7380f6f 100644 --- a/src/demuxer/mp4/mp4-demuxer.ts +++ b/src/demuxer/mp4/mp4-demuxer.ts @@ -1,59 +1,92 @@ import ByteParserUtils from '../../utils/byte-parser-utils'; + +import { IDemuxer, TracksHash } from '../demuxer'; + +import { Track, TrackType } from '../track'; +import { Mp4Track } from './mp4-track'; +import { Mp4SampleTable } from './mp4-sample-table'; + import { boxesParsers } from './atoms'; import { Atom, ContainerAtom } from './atoms/atom'; + import { Tfhd } from './atoms/tfhd'; -import { Track } from '../track'; -import { Mp4Track } from './mp4-track'; import { Tkhd } from './atoms/tkhd'; -import { IDemuxer } from '../demuxer'; -import { Frame } from '../frame'; +import { AvcC } from './atoms/avcC'; +import { Hev1 } from './atoms/hev1'; +import { Stts } from './atoms/stts'; +import { Stsc } from './atoms/stsc'; +import { Stsz } from './atoms/stsz'; +import { Ctts } from './atoms/ctts'; +import { Stss } from './atoms/stss'; +import { Stco } from './atoms/stco'; +import { Mdhd } from './atoms/mdhd'; +import { Esds } from './atoms/esds'; +import { Mvhd } from './atoms/mvhd'; +import { Tfdt } from './atoms/tfdt'; + +import { AudioAtom } from './atoms/helpers/audio-atom'; +import { VideoAtom } from './atoms/helpers/video-atom'; + +const log = (...msg: any[]) => void 0; // console.log.bind(console); +const warn = console.warn.bind(console); export class Mp4Demuxer implements IDemuxer { - public tracks: { [id: number] : Track; }; + public tracks: TracksHash = {}; + + private atoms: Atom[] = []; - private data: Uint8Array; - private atoms: Atom[]; + // track specific parsing stack private lastTrackId: number; private lastTrackDataOffset: number; + private lastAudioVideoAtom: AudioAtom | VideoAtom = null; + private lastCodecDataAtom: AvcC | Hev1 | Esds = null; + private lastSampleTable: Mp4SampleTable = null; + private lastTimescale: number = null; constructor() { - this.atoms = []; - this.tracks = {}; - - this.resetLastTrackInfos(); - } - - public getAtoms(): Atom[] { - return this.atoms; + this._resetLastTrackInfos(); } public append(data: Uint8Array): void { - this.atoms = this.parseAtoms(data); - this.updateTracks(); - } + this.atoms = this._parseAtoms(data); - public end(): void { - this.updateTracks(); + // digest any last sample-table + this._digestSampleTable(); } - private updateTracks(): void { + public end(): void {} + + public flush() { + this.atoms.length = 0; for (const trackId in this.tracks) { if (this.tracks.hasOwnProperty(trackId)) { - this.tracks[trackId].update(); + (this.tracks[trackId] as Mp4Track).flush(); } } } - private parseAtoms(data: Uint8Array, offset: number = 0): Atom[] { + public getAtoms(): Atom[] { + return this.atoms; + } + + private _parseAtoms(data: Uint8Array, offset: number = 0): Atom[] { const atoms: Atom[] = []; - this.data = data; - let dataOffset: number = offset; + let dataOffset: number = offset; while (dataOffset < data.byteLength) { - const size: number = ByteParserUtils.parseUint32(data, dataOffset); + let size: number = ByteParserUtils.parseUint32(data, dataOffset); const type: string = ByteParserUtils.parseIsoBoxType(data, dataOffset + 4); - const end: number = size > 1 ? dataOffset + size : data.byteLength; - const boxData: Uint8Array = data.subarray(dataOffset + 8, end); + + let boxDataOffset: number = dataOffset + 8; + if (size === 1) { + size = ByteParserUtils.parseUint64(data, boxDataOffset) + boxDataOffset = dataOffset + 16; + } else if (size === 0) { + size = data.byteLength; + } + + const end: number = dataOffset + size; + const boxData: Uint8Array = data.subarray(boxDataOffset, end); // parse let atom: Atom; @@ -71,83 +104,102 @@ export class Mp4Demuxer implements IDemuxer { } } + atoms.push(atom); + + this._processAtom(atom, dataOffset); + if (atom instanceof ContainerAtom) { - (atom as ContainerAtom).atoms = this.parseAtoms(boxData, (atom as ContainerAtom).containerDataOffset); + (atom as ContainerAtom).atoms = this._parseAtoms(boxData, (atom as ContainerAtom).containerDataOffset); } - atoms.push(atom); - this.processAtom(atom, dataOffset); + dataOffset = end; } + return atoms; } - private processAtom(atom: Atom, dataOffset: number): void { + private _processAtom(atom: Atom, dataOffset: number): void { + switch (atom.type) { // FIXME !!! `trex` box can contain super based set of default sample-duration/flags/size ... // (those are often repeated inside the tfhd when it is a fragmented file however, but still ... :) - // FIXME: much of this isn't going to work for plain old unfrag'd MP4 and MOV :) + case Atom.trak: + + this._digestSampleTable(); + this._resetLastTrackInfos(); case Atom.ftyp: case Atom.moov: case Atom.moof: + // (only) needed for fragmented mode this.lastTrackDataOffset = dataOffset; + + break; + + // Moov box / "initialization"-data and SIDX + + case Atom.sidx: + // FIXME: this isn't very nice + this._attemptCreateUnknownTrack(); + this._getLastTrackCreated().setSidxAtom(atom); break; case Atom.tkhd: this.lastTrackId = (atom as Tkhd).trackId; break; - case Atom.avcC: - if (this.lastTrackId > 0) { - this.tracks[this.lastTrackId] = new Mp4Track( - this.lastTrackId, - Track.TYPE_VIDEO, - Track.MIME_TYPE_AVC, - atom, - this.lastTrackDataOffset - ); - //this.resetLastTrackInfos(); - } + // Inside moov: Codec data -> create "known" tracks + + // stsd-boxed codec identifying atoms + + // AAC + case Atom.mp4a: + this.lastAudioVideoAtom = atom as AudioAtom | VideoAtom; + break; + + // H264 + case Atom.avc1: + this.lastAudioVideoAtom = atom as (AudioAtom | VideoAtom); + break; + + // H265 + case Atom.hev1: + this.lastAudioVideoAtom = atom as AudioAtom | VideoAtom; break; + // AVC/HEVC -> H264/5 case Atom.hvcC: - if (this.lastTrackId > 0) { - this.tracks[this.lastTrackId] = new Mp4Track( - this.lastTrackId, - Track.TYPE_VIDEO, - Track.MIME_TYPE_HEVC, - atom, - this.lastTrackDataOffset - ); - //this.resetLastTrackInfos(); - } + this.lastCodecDataAtom = atom as Hev1; + this._attemptCreateTrack(TrackType.VIDEO, Track.MIME_TYPE_HEVC, atom); break; - case Atom.mp4a: - if (this.lastTrackId > 0) { - this.tracks[this.lastTrackId] = new Mp4Track( - this.lastTrackId, - Track.TYPE_AUDIO, - Track.MIME_TYPE_AAC, - atom, - this.lastTrackDataOffset - ); - //this.resetLastTrackInfos(); - } + case Atom.avcC: + this.lastCodecDataAtom = atom as AvcC; + this._attemptCreateTrack(TrackType.VIDEO, Track.MIME_TYPE_AVC, atom); break; - case Atom.sidx: - this.ensureTrack(); - this.getCurrentTrack().setSidxAtom(atom); + case Atom.esds: + this._attemptCreateTrack(TrackType.AUDIO, Track.MIME_TYPE_AAC, atom); + this.lastCodecDataAtom = atom as Esds; + break; + + // Fragmented-mode ... + + case Atom.tfdt: + const tfdt: Tfdt = atom as Tfdt; + this._getLastTrackCreated().setBaseMediaDecodeTime(tfdt.baseMediaDecodeTime); break; case Atom.tfhd: - this.ensureTrack(); - const tfhd: Tfhd = ( atom); - this.getCurrentTrack().setBaseDataOffset(tfhd.baseDataOffset); - this.getCurrentTrack().setDefaults({ + // FIXME: should be handled differently + // by looking at other things inside fragments + // and mapping eventually to previously parsed moov + this._attemptCreateUnknownTrack(); + const tfhd: Tfhd = atom as Tfhd; + this._getLastTrackCreated().setBaseDataOffset(tfhd.baseDataOffset); + this._getLastTrackCreated().addDefaults({ sampleDuration: tfhd.defaultSampleDuration, sampleFlags: tfhd.defaultSampleFlags, sampleSize: tfhd.defaultSampleSize @@ -155,39 +207,149 @@ export class Mp4Demuxer implements IDemuxer { break; case Atom.trun: - this.ensureTrack(); - this.getCurrentTrack().updateSampleDataOffset(this.lastTrackDataOffset); - this.getCurrentTrack().addTrunAtom(atom); + // FIXME: should be handled differently by looking + // at other things inside fragments and mapping eventually + // to previously parsed moov (see above for tfhds) + this._attemptCreateUnknownTrack(); + this._getLastTrackCreated().addTrunAtom(atom); + break; + + case Atom.mvhd: + this.lastTimescale = (atom as Mvhd).timescale; + break; + + // Plain-old MOV ie unfragmented mode ... + + case Atom.mdhd: + this.lastTimescale = (atom as Mdhd).timescale; + break; + + case Atom.stbl: + if (this.lastSampleTable !== null) { + throw new Error('Sample-table should not exist yet'); + } + break; + case Atom.stts: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.decodingTimestamps = atom as Stts); + break; + case Atom.stss: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.syncSamples = atom as Stss); + break; + case Atom.ctts: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.compositionTimestampOffsets = atom as Ctts); + break; + case Atom.stsc: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.samplesToChunkBox = atom as Stsc); + break; + case Atom.stsz: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.sampleSizes = atom as Stsz); + break; + case Atom.stco: + this._haveSampleTable(); + this.lastSampleTable && (this.lastSampleTable.chunkOffsetBox = atom as Stco); break; + + // payload data + case Atom.mdat: + // in plain old MOV the moov may be at the end of the file (and mdat before) + if (this._getLastTrackCreated()) { + log('updating sample-data offset:', this.lastTrackDataOffset) + this._getLastTrackCreated().updateInitialSampleDataOffset(this.lastTrackDataOffset); + log('processing current track-run'); + this._getLastTrackCreated().processTrunAtoms(); + } + break; + } + } + + private _haveSampleTable() { + if (this.lastSampleTable) { + return; + } + if (!this._getLastTrackCreated() || !this._getLastTrackCreated().isAv()) { + this._attemptCreateUnknownTrack(); + warn('not unpacking sample table for non-AV track'); + } else { // we only create a sample table representation for known track types + this.lastSampleTable = new Mp4SampleTable(this._getLastTrackCreated()); + } + + } + + + private _digestSampleTable() { + if (this.lastSampleTable) { + this.lastSampleTable.digest(); + this.lastSampleTable = null; } } + private _attemptCreateTrack(type: TrackType, mime: string, ref: Atom) { + if (!this.lastTrackId) { + throw new Error('No track-id set'); + } + + if (this.tracks[this.lastTrackId]) { + log('adding ref-atom to existing track with id:', this.lastTrackId, 'mime:', mime, 'type:', type); + (this.tracks[this.lastTrackId] as Mp4Track).addReferenceAtom(ref); + return; + } + + log('creating new track:', type, mime, 'id:', this.lastTrackId) + const track = new Mp4Track( + this.lastTrackId, + type, + mime, + [ref], + this.lastAudioVideoAtom, + this.lastTrackDataOffset + ); + if (this.lastTimescale !== null) { + log('setting parent timescale on track:', this.lastTimescale); + track.setTimescale(this.lastTimescale); + } + this.tracks[this.lastTrackId] = track; + + } + /** * Creates a track in case we haven't found a codec box */ - private ensureTrack(): void { + private _attemptCreateUnknownTrack(): void { if (!this.lastTrackId || !this.tracks[this.lastTrackId]) { - this.lastTrackId = 1; + warn('creating unknown-typed track'); + if (this.lastTrackId <= 0) { + this.lastTrackId = 1 + } this.tracks[this.lastTrackId] = new Mp4Track( this.lastTrackId, - Track.TYPE_UNKNOWN, + TrackType.UNKNOWN, Track.MIME_TYPE_UNKNOWN, null, - this.lastTrackDataOffset + null, + this.lastTrackDataOffset > 0 ? this.lastTrackDataOffset : 0 ); - //this.resetLastTrackInfos(); } } /** - * should be called everytime we create a track + * Should be called everytime we create a track */ - private resetLastTrackInfos() { - this.lastTrackId = 0; + private _resetLastTrackInfos() { + this.lastTrackId = -1; this.lastTrackDataOffset = -1; + this.lastSampleTable = null; + this.lastCodecDataAtom = null; + this.lastTimescale = null; + this.lastCodecDataAtom = null; + this.lastAudioVideoAtom = null; } - private getCurrentTrack(): Mp4Track { - return (this.tracks[this.lastTrackId] as Mp4Track); + private _getLastTrackCreated(): Mp4Track { + return (this.tracks[this.lastTrackId] as Mp4Track) || null; } } diff --git a/src/demuxer/mp4/mp4-sample-flags.ts b/src/demuxer/mp4/mp4-sample-flags.ts new file mode 100644 index 0000000..56cf580 --- /dev/null +++ b/src/demuxer/mp4/mp4-sample-flags.ts @@ -0,0 +1,21 @@ +export class Mp4SampleFlags { + public isLeading: number; + public dependsOn: number; + public isDependedOn: number; + public hasRedundancy: number; + public paddingValue: number; + public isNonSyncSample: number; + public degradationPriority: number; +} + +export function parseIsoBoxSampleFlags(flags: number): Mp4SampleFlags { + return { + isLeading: (flags[0] & 0x0c) >>> 2, + dependsOn: flags[0] & 0x03, + isDependedOn: (flags[1] & 0xc0) >>> 6, + hasRedundancy: (flags[1] & 0x30) >>> 4, + paddingValue: (flags[1] & 0x0e) >>> 1, + isNonSyncSample: flags[1] & 0x01, + degradationPriority: (flags[2] << 8) | flags[3] + }; +} diff --git a/src/demuxer/mp4/mp4-sample-table.ts b/src/demuxer/mp4/mp4-sample-table.ts new file mode 100644 index 0000000..b1ce54e --- /dev/null +++ b/src/demuxer/mp4/mp4-sample-table.ts @@ -0,0 +1,142 @@ +import { toMicroseconds } from '../../utils/timescale'; +import { FRAME_TYPE } from '../../codecs/h264/nal-units'; + +import { Frame } from '../frame'; + +import { Stts, TimeToSampleEntry } from './atoms/stts'; +import { Stsc, SampleToChunkEntry } from './atoms/stsc'; +import { Stsz } from './atoms/stsz'; +import { Ctts, CTimeOffsetToSampleEntry } from './atoms/ctts'; +import { Mp4Track } from './mp4-track'; +import { Stss } from './atoms/stss'; +import { Stco } from './atoms/stco'; + +export class Mp4SampleTable { + decodingTimestamps: Stts; + compositionTimestampOffsets: Ctts; + syncSamples: Stss; + sampleSizes: Stsz; + + chunkOffsetBox: Stco + samplesToChunkBox: Stsc; + + constructor(private _track: Mp4Track) { + if (!_track) { + throw new Error('Sample-table can not be created without a Track'); + } + } + + digest() { + + //debug('digesting sample table'); + + let dts = 0; + let frameCount = 0; + + const frames: Frame[] = []; + const chunksDecompressed: {samplesPerChunk: number, sampleDescriptionIndex: number}[] = [] + + function assertAndResetFrameCount() { + if (frameCount !== frames.length) { + throw new Error('Sample-to-chunk-list decompression yields inconsistent sample count. Input data may be corrupt.'); + } + + frameCount = 0; + } + + this.decodingTimestamps.timeToSamples.forEach((entry: TimeToSampleEntry) => { + + for (let i = 0; i < entry.sampleCount; i++) { + + const isSyncFrame = this.syncSamples ? + (this.syncSamples.syncSampleNumbers.indexOf(frameCount + 1) >= 0) : false; + + const newFrame = new Frame( + isSyncFrame ? FRAME_TYPE.I : FRAME_TYPE.P, + dts, + 0, // PTO/CTO table is optional, zero is default obviously. + entry.sampleDelta, + this.sampleSizes.sampleSize || this.sampleSizes.entries[frameCount] + ); + + frames.push(newFrame); + + frameCount++; // note: here we incr the count after using it as an ordinal index + + dts += entry.sampleDelta; + } + }); + + assertAndResetFrameCount(); + + // Having a CTO table is not mandatory + if (this.compositionTimestampOffsets) { + this.compositionTimestampOffsets.cTimeOffsetToSamples.forEach((entry: CTimeOffsetToSampleEntry) => { + for (let i = 0; i < entry.sampleCount; i++) { + + frames[frameCount] + .setPresentationTimeOffset(entry.sampleCTimeOffset); + + frameCount++; // note: here we incr the count after using it as an ordinal index + } + }); + + assertAndResetFrameCount(); + } + + this.samplesToChunkBox.sampleToChunks.forEach((sampleToChunkEntry: SampleToChunkEntry, index) => { + + // the sample-to-chunk box contains a compressed list + // of possibly repeating properties (samplesPerChunk + sampleDescriptionIndex) + // we need to decompress this information by looking at firstChunkIndex + + let chunksInThisEntry = 1; + + if (this.samplesToChunkBox.sampleToChunks.length === 1) { + chunksInThisEntry = frames.length / sampleToChunkEntry.samplesPerChunk; + } else if (index < this.samplesToChunkBox.sampleToChunks.length - 1) { + chunksInThisEntry = this.samplesToChunkBox.sampleToChunks[index + 1].firstChunk + - sampleToChunkEntry.firstChunk; + } else { // last chunk when not only chunk + chunksInThisEntry = (frames.length - frameCount) / sampleToChunkEntry.samplesPerChunk; + } + + for (let i=0; i < chunksInThisEntry; i++) { + chunksDecompressed.push(sampleToChunkEntry); + frameCount += sampleToChunkEntry.samplesPerChunk; + } + + }); + + assertAndResetFrameCount(); + + chunksDecompressed.forEach((chunkSampleInfo, index) => { + + let sampleOffsetInChunk = 0; + + for (let i = 0; i < chunkSampleInfo.samplesPerChunk; i++) { + + const frame = frames[frameCount]; + + const bytesOffset = this.chunkOffsetBox.chunkOffsets[index]; + + sampleOffsetInChunk; + + frame.setBytesOffset(bytesOffset) + + sampleOffsetInChunk += frame.size; + + frameCount++; + } + + }); + + assertAndResetFrameCount(); + + // Finally, append all frames to our track + frames.forEach((frame) => { + this._track.appendFrame(frame) + }); + + //log(frames) + } +}; diff --git a/src/demuxer/mp4/mp4-track.ts b/src/demuxer/mp4/mp4-track.ts index 86de769..1811ebe 100644 --- a/src/demuxer/mp4/mp4-track.ts +++ b/src/demuxer/mp4/mp4-track.ts @@ -1,10 +1,14 @@ -import { Track } from '../track'; -import { Atom } from './atoms/atom'; -import { Frame, MICROSECOND_TIMESCALE } from '../frame'; +import { FRAME_TYPE } from '../../codecs/h264/nal-units'; + +import { Track, TrackType } from '../track'; +import { Frame } from '../frame'; +import { Atom } from './atoms/atom'; +import { AudioAtom } from './atoms/helpers/audio-atom'; +import { VideoAtom } from './atoms/helpers/video-atom'; import { Sidx } from './atoms/sidx'; -import { Trun } from './atoms/trun'; -import { Tfhd } from './atoms/tfhd'; +import { Trun, SampleFlags } from './atoms/trun'; +import { Avc1 } from './atoms/avc1'; export type Mp4TrackDefaults = { sampleDuration: number; @@ -13,23 +17,84 @@ export type Mp4TrackDefaults = { } export class Mp4Track extends Track { - private sidx: Sidx = null; - private trunInfo: Trun[] = []; - private lastPts: number; - private timescale: number; - private defaults: Mp4TrackDefaults; + + private _frames: Frame[] = []; + private baseDataOffset: number = 0; + private baseMediaDecodeTime: number = 0; + + private endDts: number = 0; - constructor(id: number, type: string, mimeType: string, public referenceAtom: Atom, public dataOffset: number) { + private defaults: Mp4TrackDefaults[] = []; + private defaultSampleFlagsParsed: (SampleFlags | null)[] = []; + private sidx: Sidx = null; + private trunInfo: Trun[] = []; + private trunInfoReadIndex: number = 0; + + constructor( + id: number, + type: TrackType, + mimeType: string, + public referenceAtoms: Atom[], + public metadataAtom: AudioAtom | VideoAtom, + public dataOffset: number + ) { super(id, type, mimeType); - this.lastPts = 0; - this.duration = 0; if (this.dataOffset < 0) { throw new Error('Invalid file, no sample-data base-offset can be determined'); } } + get frames() { return this._frames; } + + /** + * post: endDts ie duration incremented by frame duration + * @param frame + * + */ + public appendFrame(frame: Frame) { + this.endDts += frame.duration; + this._frames.push(frame); + } + + public flush() { + + this.endDts = 0; + + this.trunInfo.length = 0; + this.trunInfoReadIndex = 0; + this.defaults.length = 0; + this.defaultSampleFlagsParsed.length = 0; + super.flush(); + } + + public getDuration() { + return this._frames.length ? + this.endDts - this._frames[0].dts : 0; + } + + public getDurationInSeconds() { + if (!this.hasTimescale()) { + throw new Error(`Track ${this.type} ${this.id} timescale is not present (has not been set or determined on parsing) to convert track-duration in seconds value!`); + } + return this.getDuration() / this.getTimescale(); + } + + public getTimescale(): number { + const timescale: number = this.sidx ? + this.sidx.timescale : super.getTimescale(); + return timescale; + } + + public getResolution(): [number, number] { + if (this.type !== TrackType.VIDEO) { + throw new Error('Can not get resolution of non-video track'); + } + const avc1 = this.metadataAtom as Avc1; + return [avc1.width, avc1.height]; + } + public getSegmentIndex(): Sidx { return this.sidx; } @@ -38,20 +103,39 @@ export class Mp4Track extends Track { return this.trunInfo; } - public getReferenceAtom(): Atom { - return this.referenceAtom; + public getReferenceAtoms(): Atom[] { + return this.referenceAtoms; } - public getLastPts(): number { - return this.lastPts; + public addReferenceAtom(atom: Atom) { + this.referenceAtoms.push(atom); } - public getTimescale(): number { - return this.timescale; + public getMetadataAtom(): VideoAtom | AudioAtom { + return this.metadataAtom; + } + + public setBaseMediaDecodeTime(baseDts: number) { + this.baseMediaDecodeTime = baseDts; + this.endDts = baseDts; } - public setDefaults(defaults: Mp4TrackDefaults) { - this.defaults = defaults; + public getBaseMediaDecodeTime(): number { + return this.baseMediaDecodeTime; + } + + public addDefaults(defaults: Mp4TrackDefaults) { + this.defaults.push(defaults); + if (defaults.sampleFlags) { + this.defaultSampleFlagsParsed.push(Trun.parseFlags(new Uint8Array([ + defaults.sampleFlags & 0xff000000, + defaults.sampleFlags & 0x00ff0000, + defaults.sampleFlags & 0x0000ff00, + defaults.sampleFlags & 0x000000ff, + ]))); + } else { + this.defaultSampleFlagsParsed.push(null); + } } public getDefaults() { @@ -74,7 +158,7 @@ export class Mp4Track extends Track { * Not to be confused with the base offset which maybe be present for each track fragment inside the `tfhd`. * That value will be shared for each `trun`. */ - public updateSampleDataOffset(dataOffset: number) { + public updateInitialSampleDataOffset(dataOffset: number) { this.dataOffset = dataOffset; } @@ -85,55 +169,72 @@ export class Mp4Track extends Track { * Each trun box has it's own offset, which refers to this offset here in order to resolve the absolute position * of sample runs. */ - public getSampleDataOffset(): number { - return this.baseDataOffset + this.dataOffset; + public getFinalSampleDataOffset(): number { + return this.dataOffset + this.baseDataOffset; } public setSidxAtom(atom: Atom): void { this.sidx = atom as Sidx; - this.lastPts = 1000000 * this.sidx.earliestPresentationTime / this.sidx.timescale; - this.timescale = this.sidx.timescale; + this.endDts = this.sidx.earliestPresentationTime; + this.setTimescale(this.sidx.timescale); } + // TODO: move the truns array and processTrunAtoms to a own container class (like sample-table) public addTrunAtom(atom: Atom): void { const trun = atom as Trun; this.trunInfo.push(trun); + } - const timescale: number = this.sidx ? this.sidx.timescale : 1; + public processTrunAtoms() { + this.trunInfo.forEach((trun: Trun, trunIndex) => { - const sampleRunDataOffset: number = trun.dataOffset + this.getSampleDataOffset(); + if (trunIndex < this.trunInfoReadIndex) { + return; + } - let bytesOffset: number = sampleRunDataOffset; + const sampleRunDataOffset: number = trun.dataOffset + this.getFinalSampleDataOffset(); - for (const sample of trun.samples) { - const sampleDuration = sample.duration || this.defaults.sampleDuration; - if (!sampleDuration) { - throw new Error('Invalid file, samples have no duration'); - } + let bytesOffset: number = sampleRunDataOffset; - const duration: number = MICROSECOND_TIMESCALE * sampleDuration / timescale; + for (let i = 0; i < trun.samples.length; i++) { + const sample = trun.samples[i]; - this.lastPts += duration; - this.duration += duration; + const flags = sample.flags || this.defaultSampleFlagsParsed[trunIndex]; + if (!flags) { + //warn('no default sample flags in track sample-run'); + // in fact the trun box parser should provide a fallback instance of flags in this case + } - const flags = sample.flags || this.defaults.sampleFlags; - if (!flags) { - throw new Error('Invalid file, sample has no flags'); - } + const sampleDuration = sample.duration || this.defaults[trunIndex]?.sampleDuration; + if (!sampleDuration) { + throw new Error('Invalid file, samples have no duration'); + } - const cto: number = MICROSECOND_TIMESCALE * (sample.compositionTimeOffset || 0) / timescale; + const duration: number = sampleDuration; + const dts = this.endDts; + const cto: number = sample.compositionTimeOffset || 0; - this.frames.push(new Frame( - sample.flags.isSyncFrame ? Frame.IDR_FRAME : Frame.P_FRAME, - this.lastPts, - sample.size, - duration, - bytesOffset, - cto - )); + const frameSize = sample.size || this.defaults[trunIndex]?.sampleSize; + if (!frameSize) throw new Error('Frame has to have either sample-size of trun-entry or track default'); - bytesOffset += sample.size; - } + const frameType = flags ? (flags.isSyncFrame ? FRAME_TYPE.I : FRAME_TYPE.P) : FRAME_TYPE.NONE + + const newFrame = new Frame( + frameType, + dts, + cto, + duration, + frameSize, + bytesOffset + ); + + this.appendFrame(newFrame); + + bytesOffset += frameSize; + } + }); + + this.trunInfoReadIndex = this.trunInfo.length; } } diff --git a/src/demuxer/track.ts b/src/demuxer/track.ts index 00bc85f..799ab4d 100644 --- a/src/demuxer/track.ts +++ b/src/demuxer/track.ts @@ -1,17 +1,19 @@ -import { Frame, MICROSECOND_TIMESCALE } from './frame'; - -export class Track { - // FIXME: should be an enum type - public static TYPE_VIDEO: string = 'video'; - public static TYPE_AUDIO: string = 'audio'; - public static TYPE_TEXT: string = 'text'; - public static TYPE_COMPLEX: string = 'complex'; - public static TYPE_LOGO: string = 'logo'; - public static TYPE_BUTTONS: string = 'buttons'; - public static TYPE_CONTROL: string = 'control'; - public static TYPE_UNKNOWN: string = 'unknown'; - - // Here we don't need an enum +import { Frame } from './frame'; + +export enum TrackType { + VIDEO, + AUDIO, + TEXT, + COMPLEX, + LOGO, + BUTTONS, + CONTROL, + METADATA, + UNKNOWN +} + +export abstract class Track { + public static MIME_TYPE_AAC: string = 'audio/mp4a-latm'; public static MIME_TYPE_AVC: string = 'video/avc'; public static MIME_TYPE_HEVC: string = 'video/hevc'; @@ -21,34 +23,47 @@ export class Track { public static MIME_TYPE_ID3: string = 'application/id3'; public static MIME_TYPE_UNKNOWN: string = 'unknown'; - protected frames: Frame[] = []; - protected duration: number = NaN; + private _timeScale: number = NaN; + + constructor(public id: number, + public type: TrackType, + public mimeType: string) {} - constructor(public id: number, public type: string /* fixme: make enum type */, public mimeType: string) { - this.frames = []; + abstract readonly frames: Frame[]; + + public isAv() { + return this.type === TrackType.AUDIO || this.type === TrackType.VIDEO; + } + + public flush() { + this.frames.length = 0; } public getFrames(): Frame[] { return this.frames; } - public getDuration(): number { - return this.duration; + public hasTimescale() { + return Number.isFinite(this.getTimescale()); } - public getDurationInSeconds(): number { - return this.getDuration() / MICROSECOND_TIMESCALE; + public getTimescale() { + return this._timeScale; } - public getMetadata(): {} { // FIXME: Make this a string-to-any hash - return {}; + public setTimescale(timeScale: number) { + if (timeScale <= 0 || !Number.isSafeInteger(timeScale)) { + throw new Error(`Track timescale has to be strictly positive safe-integer value`); + } + this._timeScale = timeScale; } - public update(): void { - this.frames = this.getFrames().sort((a: Frame, b: Frame): number => { - return a.timeUs - b.timeUs; - }); - - this.duration = this.getDuration(); + /** + * @deprecated + */ + public getMetadata() { + return {} } + + public abstract getResolution(): [number, number]; } diff --git a/src/demuxer/ts/mpegts-demuxer.ts b/src/demuxer/ts/mpegts-demuxer.ts index a0fe156..6a603de 100644 --- a/src/demuxer/ts/mpegts-demuxer.ts +++ b/src/demuxer/ts/mpegts-demuxer.ts @@ -1,239 +1,318 @@ -import { BitReader } from '../../utils/bit-reader'; -import { PESReader } from './pes-reader'; -import { TSTrack } from './ts-track'; -import { Track } from '../track'; import { IDemuxer } from '../demuxer'; +import { Track, TrackType } from '../track'; + +import { MptsElementaryStreamType, PESReader } from './pes-reader'; +import { MpegTSTrack } from './ts-track'; +import { BitReader } from '../../utils/bit-reader'; +import { parsePsiPacketHeader } from './psi-header'; -enum CONTAINER_TYPE { - UNKNOWN = 1, +enum MpegContainerType { + UNKNOWN, MPEG_TS, RAW_AAC, + RAW_MPEG_AUDIO } +const ENABLE_WRAP_OVER_CLOCK_32BIT = true; + +export type MpegTSTracksHash = { [id: number] : MpegTSTrack; } + export class MpegTSDemuxer implements IDemuxer { + private static MPEGTS_SYNC: number = 0x47; - private static MPEGTS_PACKET_SIZE: number = 187; - - public tracks: { [id: number] : Track; }; - - private data: Uint8Array; - private dataOffset: number; - private containerType: number; - private pmtParsed: boolean; - private packetsCount: number; - private pmtId: number; - - constructor () { - this.containerType = CONTAINER_TYPE.UNKNOWN; - this.pmtParsed = false; - this.packetsCount = 0; - this.pmtId = -1; - this.tracks = {}; + private static MPEGTS_PACKET_SIZE: number = 188; + + private _tracks: MpegTSTracksHash = {}; + + private _containerType: MpegContainerType = MpegContainerType.UNKNOWN; + + private _data: Uint8Array; + private _dataOffset: number; + + private _packetsCount: number = 0; + + private _programMapPid: number = NaN; + private _pmtParsed: boolean = false; + + /** + * Either mutation of this property only applies to any track created *after*. + */ + public enableWrapOver32BitClock: boolean = ENABLE_WRAP_OVER_CLOCK_32BIT; + + get currentBufferSize(): number { + return this._data?.byteLength || 0; + } + + get currentPacketCount(): number { + return this._packetsCount; + } + + get isProgramMapUpdated(): boolean { + return this._pmtParsed; + } + + get tracks(): MpegTSTracksHash { + return { ... this._tracks }; + } + + get numberOfTracks(): number { + return Object.keys(this._tracks).length; } - public append(data: Uint8Array): void { - if (!this.data || this.data.byteLength === 0 || this.dataOffset >= this.data.byteLength) { - this.data = data; - this.dataOffset = 0; + public append(data: Uint8Array, pruneAfterParse: boolean = false): Uint8Array | null { + if (!this._data || this._data.byteLength === 0) { + this._data = new Uint8Array(data); + this._dataOffset = 0; } else { - const newLen: number = this.data.byteLength + data.byteLength; - const temp: Uint8Array = new Uint8Array(newLen); - temp.set(this.data, 0); - temp.set(data, this.data.byteLength); - this.data = temp; + const newLen: number = this._data.byteLength + data.byteLength; + const newBuffer: Uint8Array = new Uint8Array(newLen); + newBuffer.set(this._data, 0); + newBuffer.set(data, this._data.byteLength); + this._data = newBuffer; } - this.parse(); + this._parse(); - if (this.dataOffset > 0) { - this.data = this.data.subarray(this.dataOffset); - this.dataOffset = 0; + if (pruneAfterParse) { + return this.prune(); } - - this.updateTracks(); + return null; } - public end(): void { - for (const trackId in this.tracks) { - if (this.tracks.hasOwnProperty(trackId)) { - (this.tracks[trackId] as TSTrack).pes.flush(); - this.tracks[trackId].update(); + public prune(): Uint8Array | null { + let parsedBuf: Uint8Array = null; + // prune off parsing remainder from buffer + if (this._dataOffset > 0) { + // we might have dropped the data already + // through a parsing callback calling end() for example. + if (this._data) { + // the offset is expected to go +1 the buffer range + // thus the > instead of >= + if (this._dataOffset > this._data.byteLength) { + throw new Error('Reader offset is out of buffer range'); + } + // second arg of .subarray is exclusive range + parsedBuf = this._data.subarray(0, this._dataOffset); + // the first argument yields to an empty array when out-of-range + this._data = this._data.subarray(this._dataOffset); } + this._dataOffset = 0; } - this.data = null; + return parsedBuf; } - private parse(): void { - this.findContainerType(); + public end(): void { + this._data = null; + this._dataOffset = 0; + } + + public onProgramMapUpdate() {}; + + private _parse(): void { - if (this.containerType === CONTAINER_TYPE.MPEG_TS) { - this.readHeader(); - this.readSamples(); + this._findContainerType(); + + if (this._containerType === MpegContainerType.MPEG_TS) { + this._parseTsPackets(); } else { - const dataParser: BitReader = new BitReader(this.data); - this.tracks[0] = new TSTrack(0, Track.TYPE_AUDIO, Track.MIME_TYPE_AAC, - new PESReader(0, PESReader.TS_STREAM_TYPE_AAC)); - (this.tracks[0] as TSTrack).pes.appendData(false, dataParser); + this._parseRawEsPackets(); } } - private updateTracks(): void { - for (const trackId in this.tracks) { - if (this.tracks.hasOwnProperty(trackId)) { - this.tracks[trackId].update(); - } - } + private _parseRawEsPackets() { + const streamReader: BitReader = new BitReader(this._data); + this._tracks[0] = new MpegTSTrack(0, + TrackType.AUDIO, Track.MIME_TYPE_AAC, + new PESReader(0, MptsElementaryStreamType.TS_STREAM_TYPE_AAC, false)); + this._tracks[0].pes.appendPacket(false, streamReader); } - private resetTracks(): void { - for (let id in this.tracks) { - if (this.tracks.hasOwnProperty(id)) { - (this.tracks[id] as TSTrack).pes.reset(); - } - } - } + private _findContainerType(): void { - private findContainerType(): void { - while (this.dataOffset < this.data.byteLength) { - if (this.data[this.dataOffset] === MpegTSDemuxer.MPEGTS_SYNC) { - this.containerType = CONTAINER_TYPE.MPEG_TS; + if (this._containerType !== MpegContainerType.UNKNOWN) return; + + while (this._dataOffset < this._data.byteLength) { + if (this._data[this._dataOffset] === MpegTSDemuxer.MPEGTS_SYNC) { + this._containerType = MpegContainerType.MPEG_TS; break; - } else if ((this.data.byteLength - this.dataOffset) >= 4) { - const dataRead: number = (this.data[this.dataOffset] << 8) | (this.data[this.dataOffset + 1]); + } + + // Q: Makes sense supporting this here, or better in specific demux impl? + // Doing that in this way relates mostly to handling transparently + // HLS segments probably. But generally doesnt really give any gain + // to support plain ADTS/AAC as a container in an MPEG-TS demuxer? + // Format detection should be done on the payload before creating + // any specific demuxer for it? + // For sure, this current code-path may rather spawn unexpected side-effects, + // while being an extreme corner-case on usage side. + /* + else if ((this._data.byteLength - this._dataOffset) >= 4) { + const dataRead: number = (this._data[this._dataOffset] << 8) | (this._data[this._dataOffset + 1]); if (dataRead === 0x4944 || (dataRead & 0xfff6) === 0xfff0) { - this.containerType = CONTAINER_TYPE.RAW_AAC; + this._containerType = MpegContainerType.RAW_AAC; break; } } - this.dataOffset++; + */ + + this._dataOffset++; } - if (this.containerType === CONTAINER_TYPE.UNKNOWN) { - throw new Error('Format not supported'); + if (this._containerType === MpegContainerType.UNKNOWN) { + throw new Error('Transport stream packets format not recognized'); } } - private readHeader(): void { - while (this.dataOffset < this.data.byteLength - 1) { - const byteRead: number = this.data[this.dataOffset]; - this.dataOffset++; - - if (byteRead === MpegTSDemuxer.MPEGTS_SYNC - && (this.data.byteLength - this.dataOffset) >= MpegTSDemuxer.MPEGTS_PACKET_SIZE) { - - const packet: Uint8Array = this.data.subarray(this.dataOffset, - this.dataOffset + MpegTSDemuxer.MPEGTS_PACKET_SIZE); - this.dataOffset += MpegTSDemuxer.MPEGTS_PACKET_SIZE; + private _parseTsPackets(): void { + // run as long as there is at least a full packet in buffer + while ((this._data.byteLength - this._dataOffset) >= MpegTSDemuxer.MPEGTS_PACKET_SIZE) { - this.processTSPacket(packet); - - if (this.pmtParsed) { - break; - } + // check for sync-byte + const currentByte: number = this._data[this._dataOffset]; + if (currentByte !== MpegTSDemuxer.MPEGTS_SYNC) { + // keep looking if we are out of sync + this._dataOffset++; + continue; } + const packet: Uint8Array = this._data.subarray(this._dataOffset + 1, + this._dataOffset + MpegTSDemuxer.MPEGTS_PACKET_SIZE); + this._dataOffset += MpegTSDemuxer.MPEGTS_PACKET_SIZE; + this._readTsPacket(packet); } } - private readSamples(): void { - while (this.dataOffset < this.data.byteLength - 1) { - const byteRead: number = this.data[this.dataOffset++]; + private _readTsPacket(packet: Uint8Array): void { - if (byteRead === MpegTSDemuxer.MPEGTS_SYNC - && (this.data.byteLength - this.dataOffset) >= MpegTSDemuxer.MPEGTS_PACKET_SIZE) { + this._packetsCount++; - const packet: Uint8Array = this.data.subarray(this.dataOffset, this.dataOffset - + MpegTSDemuxer.MPEGTS_PACKET_SIZE); - this.dataOffset += MpegTSDemuxer.MPEGTS_PACKET_SIZE; + const packetReader: BitReader = new BitReader(packet); + packetReader.skipBits(1); - this.processTSPacket(packet); - } - } - } + const payloadUnitStartIndicator: boolean = (packetReader.readBits(1) !== 0); + packetReader.skipBits(1); - private processTSPacket(packet: Uint8Array): void { - this.packetsCount++; + const pid: number = packetReader.readBits(13); - let packetParser: BitReader = new BitReader(packet); - packetParser.skipBits(1); + // use unsigned right shift? + const adaptationField: number = (packetReader.readByte() & 0x30) >>> 4; - const payloadUnitStartIndicator: boolean = (packetParser.readBits(1) !== 0); - packetParser.skipBits(1); + // todo: read continuity counter - const pid: number = packetParser.readBits(13); - const adaptationField: number = (packetParser.readByte() & 0x30) >> 4; + // Adaptation field present if (adaptationField > 1) { - const length: number = packetParser.readByte(); + // adaptation field len + const length: number = packetReader.readByte(); if (length > 0) { - packetParser.skipBytes(length); + packetReader.skipBytes(length); } } + + // Payload data present if (adaptationField === 1 || adaptationField === 3) { if (pid === 0) { - this.parseProgramId(payloadUnitStartIndicator, packetParser); - } else if (pid === this.pmtId) { - this.parseProgramTable(payloadUnitStartIndicator, packetParser); + this._parseProgramAllocationTable(payloadUnitStartIndicator, packetReader); + } else if (pid === this._programMapPid) { + this._parseProgramMapTable(payloadUnitStartIndicator, packetReader); } else { - const track: TSTrack = this.tracks[pid] as TSTrack; + const track: MpegTSTrack = this._tracks[pid]; + // handle case where PID not found? if (track && track.pes) { - track.pes.appendData(payloadUnitStartIndicator, packetParser); + track.pes.appendPacket(payloadUnitStartIndicator, packetReader); } } } } - private parseProgramId(payloadUnitStartIndicator: boolean, packetParser: BitReader): void { - if (payloadUnitStartIndicator) { - packetParser.skipBytes(packetParser.readByte()); + private _parseProgramAllocationTable(payloadUnitStartIndicator: boolean, packetParser: BitReader): void { + const tableDataLength = parsePsiPacketHeader(packetParser, payloadUnitStartIndicator); + + let bytesRemaining = tableDataLength; + while (bytesRemaining > 0) { + + /** + * Program num 16 Relates to the Table ID extension in the associated PMT. A value of 0 is reserved for a NIT packet identifier. + Reserved bits 3 Set to 0x07 (all bits on) + Program map PID 13 The packet identifier that contains the associated PMT + */ + const programNum = packetParser.readBits(16); + packetParser.readBits(3); // skip reserved bits + const pid = packetParser.readBits(13); + // other progam-numbers than 0 specifiy PMT, we expect to find an entry for this in each PAT. + // optional entry with 1 is reserved for NIT (network info), where other PSI table types are carried, e.g SIT. + if (programNum !== 0) { + this._programMapPid = pid; + } + bytesRemaining -= 4; } - packetParser.skipBits(27 + 7 * 8); - this.pmtId = packetParser.readBits(13); } - private parseProgramTable(payloadUnitStartIndicator: boolean, packetParser: BitReader): void { - if (payloadUnitStartIndicator) { - packetParser.skipBytes(packetParser.readByte()); - } + private _parseProgramMapTable(payloadUnitStartIndicator: boolean, packetParser: BitReader): void { + const tableDataLength = parsePsiPacketHeader(packetParser, payloadUnitStartIndicator); - packetParser.skipBits(12); - const sectionLength: number = packetParser.readBits(12); - packetParser.skipBits(4 + 7 * 8); - const programInfoLength: number = packetParser.readBits(12); + /** + * Reserved bits 3 Set to 0x07 (all bits on) + PCR PID 13 If this is unused. then it is set to 0x1FFF (all bits on). + Reserved bits 4 Set to 0x0F (all bits on) + Program info length unused bits 2 Set to 0 (all bits off) + */ + + packetParser.skipBits(3); // reserved bits 0x07 + const pcrPid = packetParser.readBits(13); // PCR PID + packetParser.skipBits(4); // reserved bits 0x0F + packetParser.skipBits(2); // Program info length unused bits + + const programInfoLength: number = packetParser.readBits(10); packetParser.skipBytes(programInfoLength); - let bytesRemaining: number = sectionLength - 9 - programInfoLength - 4; + let bytesRemaining: number = tableDataLength - programInfoLength - 4; // 4 bytes from PMT header fields above while (bytesRemaining > 0) { + const streamType: number = packetParser.readBits(8); packetParser.skipBits(3); const elementaryPid: number = packetParser.readBits(13); packetParser.skipBits(4); const infoLength: number = packetParser.readBits(12); packetParser.skipBytes(infoLength); - bytesRemaining -= infoLength + 5; - if (!this.tracks[elementaryPid]) { - const pes: PESReader = new PESReader(elementaryPid, streamType); - let type: string; + + bytesRemaining -= infoLength + 5; // 5 bytes fields parsed above + + if (!this._tracks[elementaryPid]) { + + const pesReader: PESReader = new PESReader(elementaryPid, streamType, this.enableWrapOver32BitClock); + + let type: TrackType; let mimeType: string; - if (streamType === PESReader.TS_STREAM_TYPE_AAC) { - type = Track.TYPE_AUDIO; + if (streamType === MptsElementaryStreamType.TS_STREAM_TYPE_AAC) { + type = TrackType.AUDIO; mimeType = Track.MIME_TYPE_AAC; - } else if (streamType === PESReader.TS_STREAM_TYPE_H264) { - type = Track.TYPE_VIDEO; + } else if (streamType === MptsElementaryStreamType.TS_STREAM_TYPE_H264) { + type = TrackType.VIDEO; mimeType = Track.MIME_TYPE_AVC; - } else if (streamType === PESReader.TS_STREAM_TYPE_ID3) { - type = Track.TYPE_TEXT; + } else if (streamType === MptsElementaryStreamType.TS_STREAM_TYPE_ID3) { + type = TrackType.TEXT; mimeType = Track.MIME_TYPE_ID3; - } else if (streamType === PESReader.TS_STREAM_TYPE_MPA || streamType === PESReader.TS_STREAM_TYPE_MPA_LSF) { - type = Track.TYPE_AUDIO; + } else if (streamType === MptsElementaryStreamType.TS_STREAM_TYPE_MPA + || streamType === MptsElementaryStreamType.TS_STREAM_TYPE_MPA_LSF) { + type = TrackType.AUDIO; mimeType = Track.MIME_TYPE_MPEG; - } else if (streamType === PESReader.TS_STREAM_TYPE_METADATA) { - // do nothing + } else if (streamType === MptsElementaryStreamType.TS_STREAM_TYPE_METADATA) { + + // todo: add support reading custom metadata + type = TrackType.METADATA; } else { - type = Track.TYPE_UNKNOWN; + type = TrackType.UNKNOWN; mimeType = Track.MIME_TYPE_UNKNOWN; } - this.tracks[elementaryPid] = new TSTrack(elementaryPid, type, mimeType, pes); + this._tracks[elementaryPid] = new MpegTSTrack(elementaryPid, type, mimeType, pesReader); } } - this.pmtParsed = true; + + this._pmtParsed = true; + + if(this.numberOfTracks === 0) { + throw new Error('Parsed new PMT but have zero tracks') + } + + this.onProgramMapUpdate(); } } diff --git a/src/demuxer/ts/payload/adts-consts.ts b/src/demuxer/ts/payload/adts-consts.ts new file mode 100644 index 0000000..1caa2d6 --- /dev/null +++ b/src/demuxer/ts/payload/adts-consts.ts @@ -0,0 +1,20 @@ +export const ADTS_HEADER_LEN = 7 as const; +export const ADTS_CRC_SIZE: number = 2 as const; + +export const ADTS_SAMPLE_RATES: number[] = [ + 96000, + 88200, + 64000, + 48000, + 44100, + 32000, + 24000, + 22050, + 16000, + 12000, + 11025, + 8000, + 7350 +]; + +export const AAC_FRAME_SAMPLES_NUM = 1024; diff --git a/src/demuxer/ts/payload/adts-reader.ts b/src/demuxer/ts/payload/adts-reader.ts index ccfd4f5..377e4d4 100644 --- a/src/demuxer/ts/payload/adts-reader.ts +++ b/src/demuxer/ts/payload/adts-reader.ts @@ -1,125 +1,292 @@ -import { BitReader } from '../../../utils/bit-reader'; import { PayloadReader } from './payload-reader'; -import { Frame } from '../../frame'; import { Track } from '../../track'; +import { Frame } from '../../frame'; + +import { BitReader } from '../../../utils/bit-reader'; +import { MPEG_CLOCK_HZ } from '../../../utils/timescale'; + +import { + AAC_FRAME_SAMPLES_NUM, + ADTS_CRC_SIZE, + ADTS_HEADER_LEN, + ADTS_SAMPLE_RATES +} from './adts-consts'; -enum State { - FIND_SYNC = 1, - READ_HEADER = 2, - READ_FRAME = 3 +import { FRAME_TYPE } from '../../../codecs/h264/nal-units'; + +enum AdtsReaderState { + FIND_SYNC, + READ_HEADER, + READ_FRAME +} + +const ADTS_HEADER_LEN_WITH_CRC = ADTS_HEADER_LEN + ADTS_CRC_SIZE; + +// some popular encoders set this to MPEG2 still, even though sending AAC. +// performing the assertion (that expects MP4A compliant i.e unset), +// will then lead to an error and failure to parse every frame then. +// by default we skip this assertion to be more tolerant of either encoders, +// and assume payload advertisement is done in PMT either way and content +// validated/detected to be AAC downstream further anyhow. +// Therefore: +// Only enable this, if you know you need the parser to fail on input data where the bit is set wrongly. +const ADTS_HEADER_ASSERT_MPEG_VERSION = false; + +export interface AdtsFrameInfo { + aacObjectType: number; + channels: number; + sampleRate: number; + headerLen: number; + accessUnitSize: number; + numFrames: number } export class AdtsReader extends PayloadReader { - private static ADTS_HEADER_SIZE: number = 5; - private static ADTS_SYNC_SIZE: number = 2; - private static ADTS_CRC_SIZE: number = 2; - - private static ADTS_SAMPLE_RATES: number[] = [96000, 88200, 64000, 48000, - 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350]; - - public channels: number; - public sampleRate: number; - public frameDuration: number; - public currentFrameSize: number; - - private state: State; - - constructor () { - super(); - this.channels = 0; - this.sampleRate = 0; - this.frameDuration = 0; - this.currentFrameSize = 0; - this.state = State.FIND_SYNC; - this.dataOffset = 0; + + private _state: AdtsReaderState = AdtsReaderState.FIND_SYNC; + + private _currentFrame: AdtsFrameInfo | null = null; + + private _frameDtsOffset: number = 0; + + get currentFrameInfo(): AdtsFrameInfo | null { + return this._currentFrame; + } + + get currentSampleRate(): number { + return this.currentFrameInfo?.sampleRate; } public getMimeType(): string { return Track.MIME_TYPE_AAC; } - public consumeData(pts: number): void { + public read(dts: number): void { + // it is expected after this check a dataBuffer exists if (!this.dataBuffer) { return; - } - if (pts >= 0) { - this.timeUs = pts; + //throw new Error('read() should not be called without priorly data appended'); } - if (this.firstTimestamp === -1) { - this.firstTimestamp = this.timeUs; - } + let needMoreData = false; + while (!needMoreData && this.dataOffset < this.dataBuffer.byteLength) { + + // only update our clock upon next sync-word, + // as we may get fed PUSI packets + // with partially read prior timed frames in buffer. + if (this._state === AdtsReaderState.FIND_SYNC) { + // reset offset when PES timing updated + if (this.dts !== dts) { + this._frameDtsOffset = 0; + } + this.setCurrentTime(dts, 0); + } + + switch (this._state) { + case AdtsReaderState.FIND_SYNC: + // true when sync when + if (this._findNextSync()) { + this._state = AdtsReaderState.READ_HEADER; + } else { + needMoreData = true; + } + break; - while (this.dataOffset < this.dataBuffer.byteLength) { - if (this.state === State.FIND_SYNC) { - this.findNextSync(); - } else if (this.state === State.READ_HEADER) { - if (this.dataBuffer.byteLength - this.dataOffset < (AdtsReader.ADTS_HEADER_SIZE + - AdtsReader.ADTS_SYNC_SIZE)) { + case AdtsReaderState.READ_HEADER: + if (this.dataBuffer.byteLength - this.dataOffset + < ADTS_HEADER_LEN_WITH_CRC) { // assume longest possible header + needMoreData = true; break; } - this.parseAACHeader(); - } else if (this.state === State.READ_FRAME) { - if ((this.dataBuffer.byteLength - this.dataOffset) < (AdtsReader.ADTS_SYNC_SIZE + - AdtsReader.ADTS_HEADER_SIZE + this.currentFrameSize)) { + try { + this._parseHeader(); + } catch (err) { + // data pointers will be nulled by reset call, so we need to make err string first + const errMsg = `Error parsing header at ${this.dataOffset}/${this.dataBuffer.byteLength} [B]; t=${JSON.stringify(this.getCurrentTime())} [s]; \nException: ${(err as Error).message}`; + + this.reset(); + this._state = AdtsReaderState.FIND_SYNC; + throw new Error(errMsg); + } + this._state = AdtsReaderState.READ_FRAME; + break; + + case AdtsReaderState.READ_FRAME: + const { + headerLen, + accessUnitSize, + sampleRate, + numFrames // AAC frames in ADTS payload + } = this._currentFrame; + + if (this.dataBuffer.byteLength - this.dataOffset + < headerLen + accessUnitSize) { + needMoreData = true; break; } - this.frames.push(new Frame(Frame.IDR_FRAME, this.timeUs, this.currentFrameSize)); - this.timeUs = this.timeUs + this.frameDuration; - this.dataOffset += (AdtsReader.ADTS_SYNC_SIZE + AdtsReader.ADTS_HEADER_SIZE + - this.currentFrameSize); - this.state = State.FIND_SYNC; + let frameDtsAudioRate + = Math.round(sampleRate * this.dts / MPEG_CLOCK_HZ); + frameDtsAudioRate += this._frameDtsOffset; + + // effective frame duration results in amount of AAC frames + // contained by the current ADTS header (up to 4). + const frameDuration = numFrames * AAC_FRAME_SAMPLES_NUM; + this._frameDtsOffset += frameDuration; + + // actually using sample-rate accurate timebase + this.frames.push(new Frame( + FRAME_TYPE.NONE, + frameDtsAudioRate, + 0, // CTO actually always 0 with AAC + frameDuration, + accessUnitSize, + this.dataOffset + )); + + const frameDataStart = this.dataOffset + headerLen; + const frameDataEnd = frameDataStart + accessUnitSize; + const frameData = this.dataBuffer.subarray(frameDataStart, frameDataEnd); + + this.dataOffset = frameDataEnd; + + // note: intentionally setting state before invoke external callback + // that may have any unspecified side-effects or recursion. + this._state = AdtsReaderState.FIND_SYNC; + + this.onData(frameData, frameDtsAudioRate, 0); + break; } } + // prune buffer to remaining data this.dataBuffer = this.dataBuffer.subarray(this.dataOffset); this.dataOffset = 0; } - private findNextSync(): void { - const limit: number = this.dataBuffer.byteLength - 1; - for (let i: number = this.dataOffset; i < limit; i++) { - const dataRead: number = (((this.dataBuffer[i]) << 8) | (this.dataBuffer[i + 1])); - if ((dataRead & 0xfff6) === 0xfff0) { + /** + * + * @returns + * - true when found (post: state = READ_HEADER) + * - false when more data needed (post: dataOffset = first byte after inclusive end of scan window) + */ + private _findNextSync(): boolean { + + // sync-word spans 2 bytes (12 bits) + if (this.dataBuffer.byteLength - this.dataOffset <= 1) return false; + + // nextDataOffset should be be > 1, ensured by above check + const nextDataOffset: number = this.dataBuffer.byteLength - 1; + + // we iterate until the second-last byte only, as we need to access i+1. + // cond is false if buffer byteLength = 1 also (guarded from that case early + // as it is expected further below nextDataOffset >= 0). + for (let i: number = this.dataOffset; i < nextDataOffset; i++) { + + const wordData: number = ((this.dataBuffer[i]) << 8) | (this.dataBuffer[i + 1]); + + /** + * A 12 syncword 0xFFF, all bits must be 1 + * B 1 MPEG Version: 0 for MPEG-4, 1 for MPEG-2 + * C 2 Layer: always 0 + * D 1 protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC + */ + // 0b6 = 0110 mask to ignore the mpeg-version and CRC bit, + // and assert check for layer bits to be zero + // (additional assertion with regard to broken packet data or misc parser errors). + if ((wordData & 0xfff6) === 0xfff0) { this.dataOffset = i; - if (this.dataOffset < this.dataBuffer.byteLength) { - this.state = State.READ_HEADER; - } - return; + return true; } + + // handle/notify lost sync ? (should only happen on broken packet) } - this.dataOffset = this.dataBuffer.byteLength; + // start further read at current second-last + // attention: this assignment assumes nextDataOffset was computed + // via a non-zero byteLength of read buffer, which we assert at top. + this.dataOffset = nextDataOffset; + return false; } - private parseAACHeader(): void { - const aacHeaderParser: BitReader = new BitReader(this.dataBuffer.subarray(this.dataOffset, - this.dataOffset + AdtsReader.ADTS_SYNC_SIZE + AdtsReader.ADTS_HEADER_SIZE)); - - aacHeaderParser.skipBits(15); - const hasCrc: boolean = !aacHeaderParser.readBool(); - aacHeaderParser.skipBits(2); - const sampleRateIndex: number = aacHeaderParser.readBits(4); - if (sampleRateIndex < AdtsReader.ADTS_SAMPLE_RATES.length) { - this.sampleRate = AdtsReader.ADTS_SAMPLE_RATES[sampleRateIndex]; - } else { - this.sampleRate = sampleRateIndex; + private _parseHeader(): void { + + // first, clear current frame state. in case of exception during header parse, + // it will keep null state, as we only set the frame after success. + this._currentFrame = null; + + const reader: BitReader = new BitReader( + this.dataBuffer.subarray(this.dataOffset, this.dataBuffer.byteLength)); + + // skip sync-word + reader.skipBits(12); + + const mpegVersion: number = reader.readBool() ? 1 : 0; // MPEG Version: 0 for MPEG-4, 1 for MPEG-2 + if (ADTS_HEADER_ASSERT_MPEG_VERSION && mpegVersion !== 0) { + throw new Error(`Expected in header-data MPEG-version flag = 0 (only MP4-audio supported), but signals MPEG-2!`); + } + + reader.skipBits(2); + + const hasCrc: boolean = ! reader.readBool(); + const headerLen = hasCrc ? ADTS_HEADER_LEN_WITH_CRC : ADTS_HEADER_LEN; + + /** + * 1: AAC Main + * 2: AAC LC (Low Complexity) + * 3: AAC SSR (Scalable Sample Rate) + * 4: AAC LTP (Long Term Prediction) + */ + // profile, the MPEG-4 Audio Object Type minus 1 + const aacObjectType = reader.readBits(2) + 1; // bits value range 0-3 + if (aacObjectType <= 0 || aacObjectType >= 5) { + throw new Error(`Unsupported or likely invalid AAC profile (MPEG-4 Audio Object Type): ${aacObjectType}`); + } + + const sampleRateIndex: number = reader.readBits(4); + if (sampleRateIndex < 0 || sampleRateIndex >= ADTS_SAMPLE_RATES.length) { + throw new Error(`Invalid AAC sampling-frequency index: ${sampleRateIndex}`); + } + const sampleRate = ADTS_SAMPLE_RATES[sampleRateIndex]; + + // private bit (unused by spec) + reader.skipBits(1); + + const channelsConf = reader.readBits(3); + if (channelsConf <= 0 || channelsConf >= 8) { + throw new Error(`Channel configuration invalid value: ${channelsConf}`); } + const channels = channelsConf; - this.frameDuration = (1000000 * 1024) / this.sampleRate; + // originality/home/copyright bits (ignoring) + reader.skipBits(4); + + // ADTS frame len including the header itself (also opt CRC 2 bytes). + const adtsFrameLen = reader.readBits(13); + + if (adtsFrameLen <= 0) { + throw new Error(`Invalid ADTS-frame byte-length: ${adtsFrameLen}`); + } - aacHeaderParser.skipBits(1); - this.channels = aacHeaderParser.readBits(3); + const accessUnitSize = adtsFrameLen - headerLen; - aacHeaderParser.skipBits(4); - this.currentFrameSize = aacHeaderParser.readBits(13) - AdtsReader.ADTS_HEADER_SIZE - - AdtsReader.ADTS_SYNC_SIZE; + // Buffer fullness, states the bit-reservoir per frame. + reader.skipBits(11); - if (hasCrc) { - this.currentFrameSize -= AdtsReader.ADTS_CRC_SIZE; + // Number of AAC frames (RDBs (Raw Data Blocks)) in ADTS frame minus 1. + // 1 ADTS frame can contain up to 4 AAC frames + const numFrames = reader.readBits(2) + 1; + if (numFrames <= 0) { + throw new Error(`Invalid number of AAC frames for ADTS header: ${numFrames}`); } - this.state = State.READ_FRAME; + this._currentFrame = { + headerLen, + accessUnitSize, + channels, + aacObjectType, + sampleRate, + numFrames + }; } } diff --git a/src/demuxer/ts/payload/h264-reader.ts b/src/demuxer/ts/payload/h264-reader.ts index a43a019..ddacc33 100644 --- a/src/demuxer/ts/payload/h264-reader.ts +++ b/src/demuxer/ts/payload/h264-reader.ts @@ -2,62 +2,35 @@ import { BitReader } from '../../../utils/bit-reader'; import { PayloadReader } from './payload-reader'; import { Frame } from '../../frame'; import { Track } from '../../track'; -import { SPSParser } from '../../../codecs/h264/sps-parser'; -import { Sps } from '../../../codecs/h264/nal-units'; - -enum NAL_UNIT_TYPE { - SLICE = 1, - DPA, - DPB, - DPC, - IDR, - SEI, - SPS, - PPS, - AUD, - END_SEQUENCE, - END_STREAM -} - -enum SLICE_TYPE { - P = 0, - B, - I, - SP, - SI -} +import { H264ParameterSetParser } from '../../../codecs/h264/param-set-parser'; +import { FRAME_TYPE, mapNaluSliceToFrameType, NAL_UNIT_TYPE, SLICE_TYPE, Sps } from '../../../codecs/h264/nal-units'; -export class Fraction { - constructor(public num: number, public den: number) { - // do nothing - } -} +const NALU_DELIM_LEN = 3; export class H264Reader extends PayloadReader { - public sps: Sps; - public pps: boolean; - public pendingBytes: number; - constructor() { - super(); - this.pendingBytes = 0; - } + private _pendingBytes: number = 0; + + public sps: Sps = null; + public pps: boolean = false; public getMimeType(): string { return Track.MIME_TYPE_AVC; } - public flush(pts: number): void { - if (this.dataBuffer.byteLength > 0) { - this.consumeData(pts); + public flush(dts: number, cto: number): void { - if (this.dataBuffer.byteLength > 0) { - const offset: number = this.findNextNALUnit(0); - if (offset < this.dataBuffer.byteLength) { - this.processNALUnit(offset, this.dataBuffer.byteLength, this.dataBuffer[offset + 3] & 0x1F); - } - } + this.read(dts, cto); + + // enforced process any last data after + // a nalu-delim to be processed + // (most likely partial NALUs). + const nextNalUnit: number = this._findNextNalu(0); + if (!Number.isFinite(nextNalUnit)) { + return; } + + this._readNaluData(nextNalUnit, this.dataBuffer.byteLength); } public reset(): void { @@ -67,179 +40,129 @@ export class H264Reader extends PayloadReader { this.pps = false; } - public consumeData(pts: number): void { - if (!this.dataBuffer) { + public read(dts: number, cto: number): void { + // it is expected after this check a dataBuffer exists + if (!(this?.dataBuffer?.byteLength)) { return; + // throw new Error('read() should not be called without priorly data appended'); } - if (this.firstTimestamp === -1) { - this.timeUs = this.firstTimestamp = pts; - } + this.setCurrentTime(dts, cto); - // process any possible reminding data + let firstNalUnit: number = 0; let nextNalUnit: number = 0; - let offset: number = 0; - if (this.pendingBytes) { - nextNalUnit = this.findNextNALUnit(this.pendingBytes); - if (nextNalUnit < this.dataBuffer.byteLength) { - this.processNALUnit(0, nextNalUnit, this.dataBuffer[offset + 3] & 0x1F); - offset = nextNalUnit; + + if (this._pendingBytes > 0) { + nextNalUnit = this._findNextNalu(this._pendingBytes); + // if we cant find a next NALU-delim from the remainder data, + // we can already give-up here. + if (!Number.isFinite(nextNalUnit)) { + return; } - this.pendingBytes = 0; + this._readNaluData(firstNalUnit, nextNalUnit); + firstNalUnit = nextNalUnit; } else { - offset = this.findNextNALUnit(0); + firstNalUnit = this._findNextNalu(); + if (!Number.isFinite(firstNalUnit)) { + return; + } } // process next nal units in the buffer - if (pts !== -1) { - this.timeUs = pts; - } - - if (this.dataBuffer.byteLength > 0) { - while (nextNalUnit < this.dataBuffer.byteLength) { - nextNalUnit = this.findNextNALUnit(offset + 3); - if (nextNalUnit < this.dataBuffer.byteLength) { - this.processNALUnit(offset, nextNalUnit, this.dataBuffer[offset + 3] & 0x1F); - offset = nextNalUnit; - } + while (true) { + // w/o the +3 we would end up again with the input offset! + nextNalUnit = this._findNextNalu(firstNalUnit + NALU_DELIM_LEN); + if (!Number.isFinite(nextNalUnit)) { + break; } - this.dataBuffer = this.dataBuffer.subarray(offset); - this.pendingBytes = this.dataBuffer.byteLength; - } - } - private findNextNALUnit(index: number): number { - const limit: number = this.dataBuffer.byteLength - 3; - for (let i: number = index; i < limit; i++) { - if (this.dataBuffer[i] === 0 && this.dataBuffer[i + 1] === 0 && this.dataBuffer[i + 2] === 1) { - return i; - } + this._readNaluData(firstNalUnit, nextNalUnit); + firstNalUnit = nextNalUnit; } - return this.dataBuffer.byteLength; - } + // prune data-buffer + this.dataBuffer = this.dataBuffer.subarray(firstNalUnit); - private processNALUnit(start: number, limit: number, nalType: number): void { - if (nalType === NAL_UNIT_TYPE.SPS) { - this.parseSPSNALUnit(start, limit); - } else if (nalType === NAL_UNIT_TYPE.PPS) { - this.pps = true; - } else if (nalType === NAL_UNIT_TYPE.AUD) { - this.parseAUDNALUnit(start, limit); - } else if (nalType === NAL_UNIT_TYPE.IDR) { - this.addNewFrame(Frame.IDR_FRAME, limit - start); - } else if (nalType === NAL_UNIT_TYPE.SEI) { - this.parseSEINALUnit(start, limit); - } else if (nalType === NAL_UNIT_TYPE.SLICE) { - this.parseSliceNALUnit(start, limit); - } - } - - private parseSPSNALUnit(start: number, limit: number): void { - this.sps = SPSParser.parseSPS(this.dataBuffer.subarray(start + 4, limit)); + // we need to make sure the next read starts off + // ahead the last parsed NALU-delimiter. + this._pendingBytes = this.dataBuffer.byteLength; } - private skipScalingList(parser: BitReader, size: number): void { - let lastScale: number = 8; - let nextScale: number = 8; - for (let i: number = 0; i < size; i++) { - if (nextScale !== 0) { - const deltaScale: number = parser.readSEG(); - nextScale = (lastScale + deltaScale + 256) % 256; - } - if (nextScale !== 0) { - lastScale = nextScale; - } + private _findNextNalu(offset: number = 0): number { + if (!(this?.dataBuffer?.byteLength)) { + return NaN; } - } - private parseSEINALUnit(start: number, limit: number): void { - let seiParser: BitReader = new BitReader(this.dataBuffer.subarray(start, limit)); - seiParser.skipBytes(4); - - while (seiParser.remainingBytes() > 0) { - const data: number = seiParser.readByte(); - if (data !== 0xFF) { - break; + const length: number = this.dataBuffer.byteLength - NALU_DELIM_LEN; + for (let i: number = offset; i < length; i++) { + if (this.dataBuffer[i] === 0 + && this.dataBuffer[i + 1] === 0 + && this.dataBuffer[i + 2] === 1) { + return i; } } - - // parse payload size - while (seiParser.remainingBytes() > 0) { - const data: number = seiParser.readByte(); - if (data !== 0xFF) { - break; - } + return NaN; + } + + /** + * @param begin offset (inclusive) + * @param end offset (exclusive) + * @returns end offset (exclusive) as input + */ + private _readNaluData(begin: number, end: number) { + + const naluData = this.dataBuffer.subarray(begin + NALU_DELIM_LEN, end); + + // TODO: check for invalid values + // (can happen if buffer begin/remainder is garbage, + // assert transport parsing is correct, but also handle packet loss) + + const naluType = naluData[0] & 0x1F; + switch(naluType) { + case NAL_UNIT_TYPE.AUD: + break; + case NAL_UNIT_TYPE.SLICE: + this._parseNonIdrPicSlice(naluData); + break; + case NAL_UNIT_TYPE.IDR: + this._addFrame(FRAME_TYPE.I, naluData); + break; + case NAL_UNIT_TYPE.SPS: + this._parseSps(naluData); + break; + case NAL_UNIT_TYPE.PPS: + this.pps = true; + break; + default: + break; } - seiParser.destroy(); - seiParser = null; + this.onData(naluData, this.dts, this.cto, naluType); } - private parseSliceNALUnit(start: number, limit: number): void { - let sliceParser: BitReader = new BitReader(this.dataBuffer.subarray(start, limit)); - sliceParser.skipBytes(4); - sliceParser.readUEG(); - const sliceType: number = sliceParser.readUEG(); - const type: string = this.getSliceTypeName(sliceType); - if (this.sps && this.pps) { - this.addNewFrame(type, limit - start); - } else { - // console.warn('Slice ' + type + ' received without sps/pps been set'); - } - sliceParser.destroy(); - sliceParser = null; + private _parseSps(naluData: Uint8Array): void { + // skip first byte NALU-header for SPS-parser func input (expects only payload) + this.sps = H264ParameterSetParser.parseSPS(naluData.subarray(1)); } - private parseAUDNALUnit(start: number, limit: number): void { - // const audParser: BitReader = new BitReader(this.dataBuffer.subarray(start, limit)); - // audParser.skipBytes(4); - } + private _parseNonIdrPicSlice(naluData: Uint8Array): void { + const sliceParser: BitReader = new BitReader(naluData); - private getSliceTypeName(sliceType: number): string { - if (sliceType > 4) { - sliceType = sliceType - 5; - } - switch (sliceType) { - case SLICE_TYPE.B: - return Frame.B_FRAME; - case SLICE_TYPE.I: - return Frame.IDR_FRAME; - case SLICE_TYPE.P: - return Frame.P_FRAME; - case SLICE_TYPE.SI: - return 'SI'; - case SLICE_TYPE.SP: - return 'SP'; - default: - return 'Unknown'; - } - } + sliceParser.skipBytes(1); + sliceParser.readUEG(); + const sliceType: SLICE_TYPE = sliceParser.readUEG(); - private getNALUnitName(nalType: number): string { - switch (nalType) { - case NAL_UNIT_TYPE.SLICE: - return 'SLICE'; - case NAL_UNIT_TYPE.SEI: - return 'SEI'; - case NAL_UNIT_TYPE.PPS: - return 'PPS'; - case NAL_UNIT_TYPE.SPS: - return 'SPS'; - case NAL_UNIT_TYPE.AUD: - return 'AUD'; - case NAL_UNIT_TYPE.IDR: - return 'IDR'; - case NAL_UNIT_TYPE.END_SEQUENCE: - return 'END SEQUENCE'; - case NAL_UNIT_TYPE.END_STREAM: - return 'END STREAM'; - default: - return 'Unknown'; - } + this._addFrame(mapNaluSliceToFrameType(sliceType), naluData); } - private addNewFrame(frameType: string, frameSize: number): void { - this.frames.push(new Frame(frameType, this.timeUs, frameSize)); + private _addFrame(frameType: FRAME_TYPE, naluData: Uint8Array): void { + const frame = new Frame( + frameType, + this.dts, + this.cto, + 0, + naluData.byteLength, + ); + this.frames.push(frame); } } diff --git a/src/demuxer/ts/payload/id3-reader.ts b/src/demuxer/ts/payload/id3-reader.ts index ad71d00..a9fb0d6 100644 --- a/src/demuxer/ts/payload/id3-reader.ts +++ b/src/demuxer/ts/payload/id3-reader.ts @@ -7,7 +7,7 @@ export class ID3Reader extends PayloadReader { return Track.MIME_TYPE_ID3; } - public consumeData(pts: number): void { + public read(pts: number): void { // do nothing } } diff --git a/src/demuxer/ts/payload/mpeg-reader.ts b/src/demuxer/ts/payload/mpeg-reader.ts index 4984655..b013bbc 100644 --- a/src/demuxer/ts/payload/mpeg-reader.ts +++ b/src/demuxer/ts/payload/mpeg-reader.ts @@ -1,6 +1,8 @@ import ByteParserUtils from '../../../utils/byte-parser-utils'; import { PayloadReader } from './payload-reader'; import { Frame } from '../../frame'; +import { FRAME_TYPE } from '../../../codecs/h264/nal-units'; +import { MPEG_CLOCK_HZ } from '../../../utils/timescale'; enum State { FIND_SYNC = 1, @@ -51,16 +53,12 @@ export class MpegReader extends PayloadReader { return 'audio/' + this.mimeType; } - public consumeData(pts: number): void { + public read(dts: number, cto: number): void { if (!this.dataBuffer) { - return; - } - if (pts >= 0) { - this.timeUs = pts; - } - if (this.firstTimestamp === -1) { - this.firstTimestamp = this.timeUs; + throw new Error('read() should not be called without priorly data appended'); } + this.setCurrentTime(dts, cto); + while (this.dataOffset < this.dataBuffer.byteLength) { if (this.state === State.FIND_SYNC) { this.findHeader(); @@ -116,7 +114,6 @@ export class MpegReader extends PayloadReader { private parseHeader(header: number): boolean { if ((header & 0xFFE00000) >>> 0 !== 0xFFE00000) { - console.log(header); return false; } @@ -166,7 +163,8 @@ export class MpegReader extends PayloadReader { this.currentFrameSize = Math.floor(this.samplesPerFrame * (this.bitrate * 1000 / 8) / this.sampleRate) + padding; } } - this.frameDuration = (1000000 * this.samplesPerFrame) / this.sampleRate; + + this.frameDuration = (MPEG_CLOCK_HZ * this.samplesPerFrame) / this.sampleRate; return true; } @@ -175,9 +173,16 @@ export class MpegReader extends PayloadReader { if ((this.dataBuffer.byteLength - this.dataOffset) < (MpegReader.HEADER_SIZE + this.currentFrameSize)) { return 0; } + + this.frames.push(new Frame( + FRAME_TYPE.NONE, + this.dts, + this.cto, + this.frameDuration, + this.currentFrameSize + )); + this.state = State.FIND_SYNC; - this.frames.push(new Frame(Frame.IDR_FRAME, this.timeUs, this.currentFrameSize)); - this.timeUs = this.timeUs + this.frameDuration; return MpegReader.HEADER_SIZE + this.currentFrameSize; } } diff --git a/src/demuxer/ts/payload/payload-reader.ts b/src/demuxer/ts/payload/payload-reader.ts index 9fe266d..855e440 100644 --- a/src/demuxer/ts/payload/payload-reader.ts +++ b/src/demuxer/ts/payload/payload-reader.ts @@ -1,20 +1,63 @@ import { BitReader } from '../../../utils/bit-reader'; import { Frame } from '../../frame'; -export class PayloadReader { - public firstTimestamp: number = -1; - public timeUs: number = -1; +export abstract class PayloadReader { + public frames: Frame[] = []; public dataBuffer: Uint8Array; - protected dataOffset: number; + protected dataOffset: number = 0; + + private _currentTime: [number, number] = [NaN, NaN]; + private _firstDts: number = NaN; + + private _pusiCount: number = 0; + private _lastPusiFramesLen: number = 0; + + constructor() { + this.reset(); + } + + get dts() { return this._currentTime[0] } + get cto() { return this._currentTime[1] } + + public abstract read(dts: number, cto: number): void; + + public onData(data: Uint8Array, dts: number, cto: number, naluType?: number) {} + + public getMimeType(): string { + return 'Unknown'; + } + + public getPusiCount() { + return this._pusiCount; + } + + public setCurrentTime(dts: number, cto: number) { + if (Number.isNaN(this._firstDts)) { + this._firstDts = dts; + } + this._currentTime = [dts, cto]; + } + + public getCurrentTime() { + return this._currentTime; + } + + public append(packet: BitReader, payloadUnitStartIndicator: boolean): void { + + if (payloadUnitStartIndicator) { + this._pusiCount++; + this._lastPusiFramesLen = this.frames.length; + } + + const packetReaderOffset = packet.bytesOffset(); + const dataToAppend: Uint8Array = packet.buffer.subarray(packetReaderOffset); - public append(packet: BitReader): void { - const dataToAppend: Uint8Array = packet.buffer.subarray(packet.bytesOffset()); if (!this.dataBuffer) { this.dataBuffer = dataToAppend; } else { - const newLen: number = this.dataBuffer.byteLength + packet.remainingBytes(); + const newLen: number = this.dataBuffer.byteLength + dataToAppend.byteLength; const temp: Uint8Array = new Uint8Array(newLen); temp.set(this.dataBuffer, 0); temp.set(dataToAppend, this.dataBuffer.byteLength); @@ -23,37 +66,41 @@ export class PayloadReader { } public reset(): void { - this.frames = []; + this.frames.length = 0; + this._pusiCount = 0; + this._lastPusiFramesLen = 0; this.dataOffset = 0; - this.firstTimestamp = -1; - this.timeUs = -1; + this.dataBuffer = null; } - public flush(pts: number): void { + public flush(dts: number, cto: number): void { if (this.dataBuffer && this.dataBuffer.byteLength > 0) { - this.consumeData(pts); + this.read(dts, cto); this.dataBuffer = null; } this.dataOffset = 0; } - public consumeData(pts: number): void { - throw new Error('Should have implemented this'); - } + public popFrames(wholePayloadUnits: boolean = true): Frame[] { + // determine number of frames to splice + let numFrames = wholePayloadUnits ? this._lastPusiFramesLen : this.frames.length; + // early return shortcut opti: + if (numFrames === 0) return []; - public getMimeType(): string { - return 'Unknown'; - } + // split-slice frame-list: + // returns slice to pop, mutates list to remainder (deletes sliced items) + const frames = this.frames.splice(0, numFrames); - public getDuration(): number { - return this.getLastPTS() - this.getFirstPTS(); - } - - public getFirstPTS(): number { - return this.firstTimestamp; - } + // reset pusi related counters: + // + // note: prior bug would erraticaly set this to remainder length, + // which would cause popFrames to return frames not yet completed in buffer, + // thus bringing frame output and actual payload out of whack and + // therefore making the assumptions upon PES packet segmentation made on parsing input + // to fail in runtime assertions. + this._lastPusiFramesLen = 0; + this._pusiCount = 0; - public getLastPTS(): number { - return this.timeUs; + return frames; } } diff --git a/src/demuxer/ts/payload/pes-header.ts b/src/demuxer/ts/payload/pes-header.ts new file mode 100644 index 0000000..ad8cc3f --- /dev/null +++ b/src/demuxer/ts/payload/pes-header.ts @@ -0,0 +1,93 @@ +import { BitReader } from "../../../utils/bit-reader"; + +/** + * + * @param {BitReader} packet PES packet-reader aligned to start of optional header section. + * @returns [dts, pts] + */ +export function parsePesHeaderOptionalFields(packet: BitReader): [number, number] { + + /* + Marker bits 2 10 binary or 0x2 hex + Scrambling control 2 00 implies not scrambled + Priority 1 + Data alignment indicator 1 1 indicates that the PES packet header is immediately followed by the video start code or audio syncword + Copyright 1 1 implies copyrighted + Original or Copy 1 1 implies original + */ + packet.skipBytes(1); // todo: parse the data-alignment idc + + /* + PTS DTS indicator 2 11 = both present, 01 is forbidden, 10 = only PTS, 00 = no PTS or DTS + ESCR flag 1 + ES rate flag 1 + DSM trick mode flag 1 + Additional copy info flag 1 + CRC flag 1 + extension flag 1 + */ + const ptsDtsFlags = packet.readByte(); + + // PES header length 8 gives the length of the remainder of the PES header in bytes + let headerRemainderLen = packet.readByte(); + + // The extension header size has variable length based on the present flags. + // We need to keep track how many bytes we will effectively read, + // as this will vary, in order to skip the remaining non-read bytes in an easy way, + // without having to treat all the possible flags and field lengths cases. + let packetBytesRemaining = packet.remainingBytes(); + + /* + Optional fields variable length presence is determined by flag bits above + Stuffing Bytes variable length 0xff + */ + + /** + * Thanks to Videojs/Muxjs for this bit, which does well the + * trick around 32-bit unary bit-ops and 33 bit numbers :) + * -> See https://github.com/videojs/mux.js/blob/87f777f718b264df69a063847fe0fb9b5e0aaa6c/lib/m2ts/m2ts.js#L333 + */ + // PTS and DTS are normally stored as a 33-bit number. Javascript + // performs all bitwise operations on 32-bit integers but javascript + // supports a much greater range (52-bits) of integer using standard + // mathematical operations. + // We construct a 31-bit value using bitwise operators over the 31 + // most significant bits and then multiply by 4 (equal to a left-shift + // of 2) before we add the final 2 least significant bits of the + // timestamp (equal to an OR.) + let pts = NaN; + let dts = NaN; + if (ptsDtsFlags & 0xC0) { + + // the PTS and DTS are not written out directly. For information + // on how they are encoded, see + // http://dvd.sourceforge.net/dvdinfo/pes-hdr.html + let lastByte; + pts = (packet.readByte() & 0x0E) << 27 | + (packet.readByte() & 0xFF) << 20 | + (packet.readByte() & 0xFE) << 12 | + (packet.readByte() & 0xFF) << 5 | + ((lastByte = packet.readByte()) & 0xFE) >>> 3; + pts *= 4; // Left shift by 2 + pts += (lastByte & 0x06) >>> 1; // OR by the two LSBs + dts = pts; + if (ptsDtsFlags & 0x40) { + dts = (packet.readByte() & 0x0E) << 27 | + (packet.readByte() & 0xFF) << 20 | + (packet.readByte() & 0xFE) << 12 | + (packet.readByte() & 0xFF) << 5 | + ((lastByte = packet.readByte()) & 0xFE) >>> 3; + dts *= 4; // Left shift by 2 + dts += (lastByte & 0x06) >>> 1; // OR by the two LSBs + } + } + + // count the bytes read since the timing section start + packetBytesRemaining -= packet.remainingBytes(); + // subtract the read bytes from the header len read before + headerRemainderLen -= packetBytesRemaining; + // skip the bytes to point packet to data section + packet.skipBytes(headerRemainderLen); + + return [dts, pts]; +} diff --git a/src/demuxer/ts/payload/unknown-reader.ts b/src/demuxer/ts/payload/unknown-reader.ts index 554d89d..d1a89d1 100644 --- a/src/demuxer/ts/payload/unknown-reader.ts +++ b/src/demuxer/ts/payload/unknown-reader.ts @@ -6,7 +6,7 @@ export class UnknownReader extends PayloadReader { return 'unknown'; } - public consumeData(pts: number): void { + public read(pts: number): void { // do nothing } } diff --git a/src/demuxer/ts/pes-reader.ts b/src/demuxer/ts/pes-reader.ts index 48b2f64..17d3a7e 100644 --- a/src/demuxer/ts/pes-reader.ts +++ b/src/demuxer/ts/pes-reader.ts @@ -1,89 +1,123 @@ import { BitReader } from '../../utils/bit-reader'; + import { PayloadReader } from './payload/payload-reader'; import { UnknownReader } from './payload/unknown-reader'; import { AdtsReader } from './payload/adts-reader'; import { H264Reader } from './payload/h264-reader'; import { ID3Reader } from './payload/id3-reader'; import { MpegReader } from './payload/mpeg-reader'; +import { parsePesHeaderOptionalFields } from './payload/pes-header'; +import { mpegClockTimeToSecs } from '../../utils/timescale'; + +export enum MptsElementaryStreamType { + TS_STREAM_TYPE_AAC = 0x0F, + TS_STREAM_TYPE_H264 = 0x1B, + TS_STREAM_TYPE_ID3 = 0x15, + TS_STREAM_TYPE_MPA = 0x03, + TS_STREAM_TYPE_MPA_LSF = 0x04, + TS_STREAM_TYPE_METADATA = 0x15, + TS_STREAM_TYPE_PACKETIZED_DATA = 0x06 +} + +const MP4_BASE_MEDIA_DTS_32BIT_RANGE = Math.pow(2, 32) - 1; export class PESReader { - public static TS_STREAM_TYPE_AAC: number = 0x0F; - public static TS_STREAM_TYPE_H264: number = 0x1B; - public static TS_STREAM_TYPE_ID3: number = 0x15; - public static TS_STREAM_TYPE_MPA: number = 0x03; - public static TS_STREAM_TYPE_MPA_LSF: number = 0x04; - public static TS_STREAM_TYPE_METADATA: number = 0x06; public payloadReader: PayloadReader; - private lastPts: number; - private pesLength: number; + private currentDts: number = NaN; + private currentCto: number = NaN; - constructor(public pid: number, public type: number) { - this.pid = pid; - this.type = type; - this.lastPts = -1; - this.pesLength = 0; + constructor(public pid: number, public type: MptsElementaryStreamType, + private _timeWrapOver32BitMp4Range: boolean) { - if (type === PESReader.TS_STREAM_TYPE_AAC) { + switch(type) { + case MptsElementaryStreamType.TS_STREAM_TYPE_AAC: this.payloadReader = new AdtsReader(); - } else if (type === PESReader.TS_STREAM_TYPE_H264) { + break; + case MptsElementaryStreamType.TS_STREAM_TYPE_H264: this.payloadReader = new H264Reader(); - } else if (type === PESReader.TS_STREAM_TYPE_ID3) { + break; + case MptsElementaryStreamType.TS_STREAM_TYPE_ID3: this.payloadReader = new ID3Reader(); - } else if (type === PESReader.TS_STREAM_TYPE_MPA || type === PESReader.TS_STREAM_TYPE_MPA_LSF) { + break; + case MptsElementaryStreamType.TS_STREAM_TYPE_MPA: + case MptsElementaryStreamType.TS_STREAM_TYPE_MPA_LSF: this.payloadReader = new MpegReader(); - } else if (type === PESReader.TS_STREAM_TYPE_METADATA) { - // do nothing - } else { + break; + case MptsElementaryStreamType.TS_STREAM_TYPE_METADATA: + case MptsElementaryStreamType.TS_STREAM_TYPE_PACKETIZED_DATA: + break; + default: this.payloadReader = new UnknownReader(); + break; } + + this.payloadReader.onData = this._handlePayloadReadData.bind(this); } - public static ptsToTimeUs(pts: number): number { - return (pts * 1000000) / 90000; + public getStreamTypeName(): string { + return MptsElementaryStreamType[this.type]; + } + + public reset(): void { + this.payloadReader.reset(); + } + + public flush(): void { + this.payloadReader.flush(this.currentDts, this.currentCto); } - public appendData(payloadUnitStartIndicator: boolean, packet: BitReader): void { + /** + * Can be overriden instance-wise in userland as in "cheap and fast" event-target. + */ + public onPayloadData(data: Uint8Array, dts: number, cto: number, naluType: number) {} + + /** + * Expects a TS packet-reader aligned on its respective payload section. + */ + public appendPacket(payloadUnitStartIndicator: boolean, packet: BitReader): void { + // a packet with PUSI flag starts with a PES header. + // reading it will update our internal DTS/CTO timing state to the current + // payload unit ie frame(s) contained within. if (payloadUnitStartIndicator) { - if (this.payloadReader) { - this.payloadReader.consumeData(this.lastPts); - } - this.parsePESHeader(packet); - } + // Q: call read before (if data buffer is filled) + // to take out timing alignment concern from payloadReader ? - if (this.payloadReader) { - this.payloadReader.append(packet); + // post: dts/cto updated, packet-reader aligned to payload data section + this._readHeader(packet); } + // append to payload buffer (super-class generic method) + this.payloadReader.append(packet, payloadUnitStartIndicator); + // call the reader impl + this.payloadReader.read(this.currentDts, this.currentCto); } - public parsePESHeader(packet: BitReader): void { - packet.skipBytes(7); - const timingFlags: number = packet.readByte(); - if (timingFlags & 0xC0) { - packet.skipBytes(1); - let pts: number; - pts = (packet.readByte() & 0x0E) << 27 | - (packet.readByte() & 0xFF) << 20 | - (packet.readByte() & 0xFE) << 12 | - (packet.readByte() & 0xFF) << 5; - const val: number = packet.readByte(); - pts |= (val & 0xFE) >>> 3; - pts = pts << 2; - pts += (val & 0x06) >>> 1; - this.lastPts = PESReader.ptsToTimeUs(pts); + private _readHeader(packet: BitReader): void { + + const readStartCode = packet.readBits(24) === 1; + if (!readStartCode) { + throw new Error(`No start-code found parsing PES header`); } - } + const streamId = packet.readByte(); + const pesPacketLen = ((packet.readByte() << 8) | packet.readByte()); - public reset(): void { - if (this.payloadReader) { - this.payloadReader.reset(); + // parses the optional header section. + // reads the packet up to the data section in every case. + let [dts, pts] = parsePesHeaderOptionalFields(packet); + + const cto = pts - dts; + if (cto < 0) { + throw new Error(`Computed CTO < 0 with DTS = ${dts} (${mpegClockTimeToSecs(dts)} [s]) / PTS = ${pts} (${mpegClockTimeToSecs(pts)} [s])`); } + + this.currentDts = this._timeWrapOver32BitMp4Range ? dts % MP4_BASE_MEDIA_DTS_32BIT_RANGE : dts; + this.currentCto = cto; } - public flush(): void { - if (this.payloadReader) { - this.payloadReader.flush(this.lastPts); - } + private _handlePayloadReadData(data: Uint8Array, dts: number, cto: number, naluType: number = NaN) { + if (!this.payloadReader.frames.length) return; + + this.onPayloadData(data, dts, cto, naluType); } } diff --git a/src/demuxer/ts/psi-header.ts b/src/demuxer/ts/psi-header.ts new file mode 100644 index 0000000..40b13e9 --- /dev/null +++ b/src/demuxer/ts/psi-header.ts @@ -0,0 +1,44 @@ +import { BitReader } from "../../utils/bit-reader"; + +/** + * Parses the PSI until the table-data section start, returns the size of remaining bytes in it, without the CRC. + * @param packetParser + * @param payloadUnitStartIndicator + * @returns Section length minus 9 (5 bytes syntax section + 32 bits trailing CRC) = Table/Data section size + */ +export function parsePsiPacketHeader(packetParser: BitReader, payloadUnitStartIndicator: boolean) { + // PSI structure start: read pointer field and skip filler bytes + if (payloadUnitStartIndicator) { + packetParser.skipBytes(packetParser.readByte()); + } + /** + Table ID 8 + Section syntax indicator 1 + Private bit 1 The PAT, PMT, and CAT all set this to 0. Other tables set this to 1. + Reserved bits 2 Set to 0x03 (all bits on) + */ + packetParser.skipBits(12); // skip prior PSI header data (we expect table ID to comply and syntax section to be present always). + packetParser.skipBits(2); // Section length unused bits + const sectionLength: number = packetParser.readBits(10); // Section length + + /* + Table ID extension 16 Informational only identifier. The PAT uses this for the transport stream identifier and the PMT uses this for the Program number. + Reserved bits 2 Set to 0x03 (all bits on) + Version number 5 Syntax version number. Incremented when data is changed and wrapped around on overflow for values greater than 32. + Current/next indicator 1 Indicates if data is current in effect or is for future use. If the bit is flagged on, then the data is to be used at the present moment. + Section number 8 This is an index indicating which table this is in a related sequence of tables. The first table starts from 0. + Last section number 8 This indicates which table is the last table in the sequence of tables. + + => 5 bytes + + */ + + packetParser.skipBytes(5)// Skip all syntax section prior table data + + /* + Table data N*8 Data as defined by the Table Identifier. + CRC32 32 A checksum of the entire table excluding the pointer field, pointer filler bytes and the trailing CRC32. + */ + + return sectionLength - 9; +} diff --git a/src/demuxer/ts/ts-track.ts b/src/demuxer/ts/ts-track.ts index adf7e0d..a8d02d2 100644 --- a/src/demuxer/ts/ts-track.ts +++ b/src/demuxer/ts/ts-track.ts @@ -1,53 +1,31 @@ -import { Track } from '../track'; +import { Track, TrackType } from '../track'; import { Frame } from '../frame'; import { PESReader } from './pes-reader'; -import { H264Reader } from './payload/h264-reader'; -import { AdtsReader } from './payload/adts-reader'; -import { Sps } from '../../codecs/h264/nal-units'; -export class TSTrack extends Track { - constructor(id: number, type: string, mimeType: string, public pes: PESReader) { +export class MpegTSTrack extends Track { + + constructor(id: number, type: TrackType, mimeType: string, + public pes: PESReader) { + super(id, type, mimeType); } - public getDuration(): number { - if (this.pes && this.pes.payloadReader) { - return this.pes.payloadReader.getDuration(); - } - return 0; + public toJSON() { + const { id, type, mimeType } = this; + return { + id, + type, + mimeType + }; } - public getFrames(): Frame[] { - if (this.pes && this.pes.payloadReader) { - return this.pes.payloadReader.frames; - } - return []; + get frames() { return this?.pes?.payloadReader.frames || []; } + + getResolution(): [number, number] { + return [0, 0]; } - public getMetadata(): {} { - if (this.pes && this.pes.payloadReader) { - if (this.pes.payloadReader instanceof H264Reader && (this.pes.payloadReader as H264Reader).sps) { - const sps: Sps = (this.pes.payloadReader as H264Reader).sps; - return { - profile: sps.profile, - level: sps.level, - bitDepth: sps.bitDepth, - chromaFormat: sps.chromaFormat, - frameRate: sps.frameRate, - sar: sps.sar, - codecSize: sps.codecSize, - presentSize: - sps.presentSize, - }; - } else if (this.pes.payloadReader instanceof AdtsReader) { - const adtsReader: AdtsReader = this.pes.payloadReader as AdtsReader; - return { - channels: adtsReader.channels, - sampleRate: adtsReader.sampleRate, - frameDuration: adtsReader.frameDuration, - }; - } - } - return {}; + popFrames(wholePayloadUnits: boolean = true): Frame[] { + return this.pes?.payloadReader?.popFrames(wholePayloadUnits) || []; } } diff --git a/src/demuxer/webm/webm-demuxer.ts b/src/demuxer/webm/webm-demuxer.ts index 7fdf0dc..c8cdf5d 100644 --- a/src/demuxer/webm/webm-demuxer.ts +++ b/src/demuxer/webm/webm-demuxer.ts @@ -38,19 +38,14 @@ export class WebMDemuxer implements IDemuxer { } this.elements = this.parseElements(this.data.byteLength); - console.log(this.elements); if (this.dataOffset > 0) { this.data = this.data.subarray(this.dataOffset); this.dataOffset = 0; } - - this.updateTracks(); } - public end(): void { - this.updateTracks(); - } + public end(): void {} private parseElements(end: number): EbmlElement[] { const elements: EbmlElement[] = []; @@ -205,12 +200,4 @@ export class WebMDemuxer implements IDemuxer { } return null; } - - private updateTracks(): void { - for (const trackId in this.tracks) { - if (this.tracks.hasOwnProperty(trackId)) { - this.tracks[trackId].update(); - } - } - } } diff --git a/src/demuxer/webm/webm-track.ts b/src/demuxer/webm/webm-track.ts index 885cbdc..d3cb1d1 100644 --- a/src/demuxer/webm/webm-track.ts +++ b/src/demuxer/webm/webm-track.ts @@ -1,10 +1,16 @@ -import { Track } from '../track'; +import ByteParserUtils from '../../utils/byte-parser-utils'; +import { FRAME_TYPE } from '../../codecs/h264/nal-units'; + +import { Track, TrackType } from '../track'; import { Frame } from '../frame'; + import { ITrackInfo } from './elements/track-info'; import { Vint, EbmlElement } from './ebml/ebml-types'; -import ByteParserUtils from '../../utils/byte-parser-utils'; export class WebMTrack extends Track { + + private _frames: Frame[]; + private lastPts: number; private nsPerFrame: number; private lastTimecodeBase: number; @@ -13,44 +19,42 @@ export class WebMTrack extends Track { private metadata: any; constructor(info: ITrackInfo, metadata: any) { - const type: string = WebMTrack.getType(info.TrackType); + const type = WebMTrack.getType(info.TrackType); const codec: string = info.CodecName || WebMTrack.getCodecNameFromID(info.CodecID); super(info.TrackNumber, type, type + '/' + codec); + // explicit init after super-call needed + this._frames = []; + this.lastPts = 0; + this.lastTimecodeBase = 0; + this.type = type; this.codec = codec; this.metadata = metadata; - this.lastPts = 0; this.nsPerFrame = info.DefaultDuration; - this.lastTimecodeBase = 0; this.timecodeScale = info.TrackTimecodeScale; } - private static getType(type: number): string { - switch (type) { - case 1: - return Track.TYPE_VIDEO; - - case 2: - return Track.TYPE_AUDIO; - - case 3: - return Track.TYPE_COMPLEX; - - case 0x10: - return Track.TYPE_LOGO; - - case 0x11: - return Track.TYPE_TEXT; - - case 0x12: - return Track.TYPE_BUTTONS; + get frames() { return this._frames; } - case 0x20: - return Track.TYPE_CONTROL; - - default: - return Track.TYPE_UNKNOWN; + private static getType(type: number): TrackType { + switch (type) { + case 1: + return TrackType.VIDEO; + case 2: + return TrackType.AUDIO; + case 3: + return TrackType.COMPLEX; + case 0x10: + return TrackType.LOGO; + case 0x11: + return TrackType.TEXT; + case 0x12: + return TrackType.BUTTONS; + case 0x20: + return TrackType.CONTROL; + default: + return TrackType.UNKNOWN; } } @@ -65,8 +69,12 @@ export class WebMTrack extends Track { return codecID.substr(pos + 1); } + public getResolution(): [number, number] { + throw new Error('Method not implemented.'); + } + public getFrames(): Frame[] { - return this.frames; + return this._frames; } public getMetadata(): any { // FIXME: Seems this is the only implementation and it violates the base-class return-type @@ -75,7 +83,7 @@ export class WebMTrack extends Track { if (!this.metadata) { return null; } - if (this.type === Track.TYPE_VIDEO) { + if (this.type === TrackType.VIDEO) { return { codecSize: { height: this.metadata.PixelHeight, @@ -86,7 +94,7 @@ export class WebMTrack extends Track { width: this.metadata.DisplayWidth, } }; - } else if (this.type === Track.TYPE_AUDIO) { + } else if (this.type === TrackType.AUDIO) { return { sampleRate: this.metadata.SamplingFrequency }; @@ -101,12 +109,21 @@ export class WebMTrack extends Track { const buffer: Uint8Array = element.data as Uint8Array; const timecode: number = ByteParserUtils.parseUint16(buffer, trackId.length); const flags: number = ByteParserUtils.parseUint(buffer, trackId.length + 2, 1); + this.lastPts = 1000 * ((this.lastTimecodeBase + timecode) / (this.timecodeScale > 0 ? this.timecodeScale : 1)); if (element.name === 'SimpleBlock' && flags & 0x80) { - this.frames.push(new Frame(Frame.IDR_FRAME, this.lastPts, buffer.length)); + this._frames.push(new Frame( + FRAME_TYPE.I, + this.lastPts, 0, 0, + buffer.length + )); } else { - this.frames.push(new Frame(Frame.P_FRAME, this.lastPts, buffer.length)); + this._frames.push(new Frame( + FRAME_TYPE.P, + this.lastPts, 0, 0, + buffer.length + )); } } } diff --git a/src/index.ts b/src/index.ts index 8f2a2e7..0d916b1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,28 +1,17 @@ import { MpegTSDemuxer } from './demuxer/ts/mpegts-demuxer'; import { Mp4Demuxer } from './demuxer/mp4/mp4-demuxer'; import { WebMDemuxer } from './demuxer/webm/webm-demuxer'; - -import { IDemuxer, TracksHash } from './demuxer/demuxer'; -import { Track } from './demuxer/track'; -import { Frame } from './demuxer/frame'; - -import { Atom, ContainerAtom } from './demuxer/mp4/atoms/atom'; - -import { FrameRate, Size } from './codecs/video-types'; - import { WebWorker } from './utils/web-worker'; -export type MpegTSDemuxer = MpegTSDemuxer; -export type Mp4Demuxer = Mp4Demuxer; -export type WebMDemuxer = WebMDemuxer; -export type IDemuxer = IDemuxer; -export type TracksHash = TracksHash; -export type Track = Track; -export type Frame = Frame; -export type FrameRate = FrameRate; -export type Size = Size; - -export type Atom = Atom; +export { MpegTSDemuxer } from './demuxer/ts/mpegts-demuxer'; +export { Mp4Demuxer } from './demuxer/mp4/mp4-demuxer'; +export { WebMDemuxer } from './demuxer/webm/webm-demuxer'; +export { FrameRate, Size } from './codecs/video-types'; +export { IDemuxer, TracksHash } from './demuxer/demuxer'; +export { Track } from './demuxer/track'; +export { Frame } from './demuxer/frame'; +export { Atom, ContainerAtom } from './demuxer/mp4/atoms/atom'; +export { MpegTSTrack as TSTrack } from './demuxer/ts/ts-track'; export function createMpegTSDemuxer(): MpegTSDemuxer { // Q: these methods should return IDemuxer to maintain abstraction solid? return new MpegTSDemuxer(); diff --git a/src/utils/bit-reader.ts b/src/utils/bit-reader.ts index edac63f..80a6687 100644 --- a/src/utils/bit-reader.ts +++ b/src/utils/bit-reader.ts @@ -11,10 +11,6 @@ export class BitReader { this.loadWord(); } - public destroy(): void { - this.buffer = null; - } - public remainingBytes(): number { return this.workingBytesAvailable + this.workingBitsAvailable / 8; } @@ -75,7 +71,7 @@ export class BitReader { return this.readBits(1) === 1; } - public readByte = function(): number { + public readByte(): number { return this.readBits(8); }; diff --git a/src/utils/byte-parser-utils.ts b/src/utils/byte-parser-utils.ts index 14584f9..722d929 100644 --- a/src/utils/byte-parser-utils.ts +++ b/src/utils/byte-parser-utils.ts @@ -1,12 +1,5 @@ -export class Mp4SampleFlags { - public isLeading: number; - public dependsOn: number; - public isDependedOn: number; - public hasRedundancy: number; - public paddingValue: number; - public isNonSyncSample: number; - public degradationPriority: number; -} +import { Mp4SampleFlags } from "../demuxer/mp4/mp4-sample-flags"; + declare function escape(s: string): string; export default class ByteParserUtils { @@ -53,10 +46,25 @@ export default class ByteParserUtils { return value; } - public static parseUint(buffer: Uint8Array, offset: number, len: number): number { + public static parseUint(buffer: Uint8Array, offset: number, len: number, allowOverflow: boolean = true): number { let value: number = 0; - for (let i: number = 0; i < len; i++) { + let safeValue: number = 0; + // we need to walk-back instead of iterating up because otherwise the value-checking to prevent overflow will not work + for (let i: number = len - 1; i >= 0; i--) { value |= (buffer[offset + i] << ((len - i - 1) * 8)) >>> 0; + if (!allowOverflow) { + /** + * NOTE: In JS engines, usually numbers are stored in 32-bit registers using signed types. That leaves 31 bits for the actual value. + * As we write into this variable and shift bits, it is to be expected that overflow will happen when we have a number exceeded 2^31 stored inside this buffer. + * We are detecting this via this method of storing the last "safe" value and detecting overflow by comparison (it will be either less in positive value or negative then). + * The only thing we can do atm is throw this error and bailing out from any use-cases needing actually large numbers as this. + * TODO: any solution to handle large 64 bits values + */ + if (value < safeValue) { + throw new Error(`Error parsing ${len} bytes-long unsigned integer from buffer: value overflow/wrap-around from previously ${safeValue} to falsely ${value} at byte-index ${i}`) + } + safeValue = value; + } } return value; } @@ -69,8 +77,8 @@ export default class ByteParserUtils { return ByteParserUtils.parseUint(buffer, offset, 4); } - public static parseLong64(buffer: Uint8Array, offset: number): number { - return ByteParserUtils.parseUint(buffer, offset, 8); + public static parseUint64(buffer: Uint8Array, offset: number): number { + return ByteParserUtils.parseUint(buffer, offset, 8, false); // do NOT allow internal value wrap-over on assumed 64-bit values } public static parseIsoBoxType(buffer: Uint8Array, offset: number): string { @@ -86,18 +94,6 @@ export default class ByteParserUtils { return new Date(seconds * 1000 - 2082844800000); } - public static parseIsoBoxSampleFlags(flags: number): Mp4SampleFlags { - return { - isLeading: (flags[0] & 0x0c) >>> 2, - dependsOn: flags[0] & 0x03, - isDependedOn: (flags[1] & 0xc0) >>> 6, - hasRedundancy: (flags[1] & 0x30) >>> 4, - paddingValue: (flags[1] & 0x0e) >>> 1, - isNonSyncSample: flags[1] & 0x01, - degradationPriority: (flags[2] << 8) | flags[3] - }; - } - public static parseBufferToHex(buffer: Uint8Array, offset: number, end: number): string { let str: string = ''; for (let i: number = offset; i < end; i++) { diff --git a/src/utils/timescale.ts b/src/utils/timescale.ts new file mode 100644 index 0000000..ae0fac4 --- /dev/null +++ b/src/utils/timescale.ts @@ -0,0 +1,12 @@ +export const MICROSECOND_TIMESCALE = 1000000; + +export function toMicroseconds(value, timescale) { + return MICROSECOND_TIMESCALE * value / timescale; +} + +export const MPEG_CLOCK_HZ = 90000; + +export function mpegClockTimeToSecs(time: number): number { + return time / MPEG_CLOCK_HZ; +} + diff --git a/webpack.config.js b/webpack.config.js index 91c3d43..7b155f2 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -21,6 +21,7 @@ const config = { // We target a UMD and name it MyLib. When including the bundle in the browser // it will be accessible at `window.MyLib` output: { + publicPath: '/', path: PATHS.bundles, filename: '[name].js', libraryTarget: 'umd',