Skip to content

Commit

Permalink
WHIP: Insert SPS and PPS before IDR frames in annexb format due to h2…
Browse files Browse the repository at this point in the history
…64_mp4toannexb filter only processing MP4 ISOM format.
  • Loading branch information
winlinvip committed Jun 10, 2023
1 parent eb0d0c0 commit 981a098
Showing 1 changed file with 108 additions and 11 deletions.
119 changes: 108 additions & 11 deletions libavformat/rtcenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,13 @@ typedef struct RTCContext {
AVCodecParameters *audio_par;
AVCodecParameters *video_par;

/**
* The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet;
* therefore, it is essential to insert the SPS and PPS before each IDR frame
* in such cases.
*/
int h264_annexb_insert_sps_pps;

/* The ICE username and pwd fragment generated by the muxer. */
char ice_ufrag_local[9];
char ice_pwd_local[33];
Expand Down Expand Up @@ -970,7 +977,6 @@ typedef struct RTCContext {
int64_t rtc_ice_time;
int64_t rtc_dtls_time;
int64_t rtc_srtp_time;
int64_t rtc_ready_time;

/* The DTLS context. */
DTLSContext dtls_ctx;
Expand Down Expand Up @@ -1113,6 +1119,12 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par)
if (par->profile != FF_PROFILE_UNKNOWN && par->level != FF_LEVEL_UNKNOWN)
return ret;

if (!par->extradata || par->extradata_size <= 0) {
par->profile = FF_PROFILE_H264_BASELINE;
par->level = 0x1e;
return ret;
}

while (1) {
r = avpriv_find_start_code(r, end, &state);
if (r >= end)
Expand All @@ -1127,7 +1139,7 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par)
return ret;
}

av_log(rtc, AV_LOG_INFO, "WHIP: Parse profile=%d, level=%d from SPS\n",
av_log(rtc, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n",
sps->profile_idc, sps->level_idc);
par->profile = sps->profile_idc;
par->level = sps->level_idc;
Expand Down Expand Up @@ -2192,7 +2204,6 @@ static int rtc_create_rtp_muxer(AVFormatContext *s)

if (rtc->state < RTC_STATE_READY)
rtc->state = RTC_STATE_READY;
rtc->rtc_ready_time = av_gettime();
av_log(rtc, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, "
"elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n",
rtc->state, buffer_size, max_packet_size, ELAPSED(rtc->rtc_starttime, av_gettime()),
Expand Down Expand Up @@ -2303,6 +2314,89 @@ static av_cold int rtc_init(AVFormatContext *s)
return ret;
}

/**
* Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses
* the annexb format, it is necessary to manually insert encoder metadata before each
* IDR when dealing with annexb format packets. For instance, in the case of H.264,
* we must insert SPS and PPS before the IDR frame.
*/
static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt)
{
int ret = 0;
AVPacket *in = NULL;
AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar;
uint32_t nal_size = 0, out_size = par ? par->extradata_size : 0;
uint8_t unit_type, sps_seen = 0, pps_seen = 0, idr_seen = 0, *out;
const uint8_t *buf, *buf_end, *r1;

if (!pkt || !pkt->data || pkt->size <= 0)
return ret;
if (!par || !par->extradata || par->extradata_size <= 0)
return ret;

/* Discover NALU type from packet. */
buf_end = pkt->data + pkt->size;
for (buf = ff_avc_find_startcode(pkt->data, buf_end); buf < buf_end; buf += nal_size) {
while (!*(buf++));
r1 = ff_avc_find_startcode(buf, buf_end);
if ((nal_size = r1 - buf) > 0) {
unit_type = *buf & 0x1f;
if (unit_type == H264_NAL_SPS) {
sps_seen = 1;
} else if (unit_type == H264_NAL_PPS) {
pps_seen = 1;
} else if (unit_type == H264_NAL_IDR_SLICE) {
idr_seen = 1;
}

out_size += 3 + nal_size;
}
}

if (!idr_seen || (sps_seen && pps_seen))
return ret;

/* See av_bsf_send_packet */
in = av_packet_alloc();
if (!in)
return AVERROR(ENOMEM);

ret = av_packet_make_refcounted(pkt);
if (ret < 0)
goto fail;

av_packet_move_ref(in, pkt);

/* Create a new packet with sps/pps inserted. */
ret = av_new_packet(pkt, out_size);
if (ret < 0)
goto fail;

ret = av_packet_copy_props(pkt, in);
if (ret < 0)
goto fail;

memcpy(pkt->data, par->extradata, par->extradata_size);
out = pkt->data + par->extradata_size;
buf_end = in->data + in->size;
for (buf = ff_avc_find_startcode(in->data, buf_end); buf < buf_end; buf += nal_size) {
while (!*(buf++));
r1 = ff_avc_find_startcode(buf, buf_end);
if ((nal_size = r1 - buf) > 0) {
AV_WB24(out, 0x00001);
memcpy(out + 3, buf, nal_size);
out += 3 + nal_size;
}
}

fail:
if (ret < 0)
av_packet_unref(pkt);
av_packet_free(&in);

return ret;
}

static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt)
{
int ret;
Expand All @@ -2329,6 +2423,13 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt)
goto end;
}

if (rtc->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) {
if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) {
av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n");
goto end;
}
}

ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0);
if (ret < 0) {
if (ret == AVERROR(EINVAL)) {
Expand Down Expand Up @@ -2395,9 +2496,10 @@ static int rtc_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket
extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1;
if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) {
ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL);
av_log(rtc, AV_LOG_INFO, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n",
av_log(rtc, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n",
b[0], b[1], b[2], b[3], b[4], extradata_isom);
}
} else
rtc->h264_annexb_insert_sps_pps = 1;
}

return ret;
Expand All @@ -2424,12 +2526,7 @@ const FFOutputFormat ff_rtc_muxer = {
.p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"),
.p.audio_codec = AV_CODEC_ID_OPUS,
.p.video_codec = AV_CODEC_ID_H264,
/**
* Avoid using AVFMT_GLOBALHEADER, for annexb format, it's necessary for the
* encoder to insert metadata headers (e.g., SPS/PPS for H.264) before each
* IDR frame.
*/
.p.flags = AVFMT_NOFILE,
.p.flags = AVFMT_NOFILE | AVFMT_GLOBALHEADER,
.p.priv_class = &rtc_muxer_class,
.priv_data_size = sizeof(RTCContext),
.init = rtc_init,
Expand Down

0 comments on commit 981a098

Please sign in to comment.