diff --git a/src/audiosource.cpp b/src/audiosource.cpp index b81b201..1349a9f 100644 --- a/src/audiosource.cpp +++ b/src/audiosource.cpp @@ -70,7 +70,7 @@ bool LWAudioDecoder::DecodeNextFrame(bool SkipOutput) { return false; } -void LWAudioDecoder::OpenFile(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map &LAVFOpts, double DrcScale) { +void LWAudioDecoder::OpenFile(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map &LAVFOpts, double DrcScale) { TrackNumber = Track; AVDictionary *Dict = nullptr; @@ -134,12 +134,6 @@ void LWAudioDecoder::OpenFile(const std::filesystem::path &SourceFile, int Track } CodecContext->thread_count = Threads; - // FIXME, implement for newer ffmpeg versions - if (!VariableFormat) { - // Probably guard against mid-stream format changes - CodecContext->flags |= AV_CODEC_FLAG_DROPCHANGED; - } - if (DrcScale < 0) throw BestSourceException("Invalid drc_scale value"); @@ -151,10 +145,10 @@ void LWAudioDecoder::OpenFile(const std::filesystem::path &SourceFile, int Track throw BestSourceException("Could not open audio codec"); } -LWAudioDecoder::LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map &LAVFOpts, double DrcScale) { +LWAudioDecoder::LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map &LAVFOpts, double DrcScale) { try { Packet = av_packet_alloc(); - OpenFile(SourceFile, Track, VariableFormat, Threads, LAVFOpts, DrcScale); + OpenFile(SourceFile, Track, Threads, LAVFOpts, DrcScale); } catch (...) { Free(); throw; @@ -197,37 +191,12 @@ void LWAudioDecoder::SetFrameNumber(int64_t N, int64_t SampleNumber) { CurrentSample = SampleNumber; } -void LWAudioDecoder::GetAudioProperties(BSAudioProperties &AP) { - assert(CurrentFrame == 0); +void LWAudioDecoder::GetAudioProperties(LWAudioProperties &AP) { AP = {}; - AVFrame *PropFrame = GetNextFrame(); - assert(PropFrame); - if (!PropFrame) - return; - AP.AF.Set(PropFrame->format, CodecContext->bits_per_raw_sample); - AP.SampleRate = PropFrame->sample_rate; - AP.Channels = PropFrame->ch_layout.nb_channels; - - if (PropFrame->ch_layout.order == AV_CHANNEL_ORDER_NATIVE) { - AP.ChannelLayout = PropFrame->ch_layout.u.mask; - } else if (PropFrame->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) { - AVChannelLayout ch = {}; - av_channel_layout_default(&ch, PropFrame->ch_layout.nb_channels); - AP.ChannelLayout = ch.u.mask; - } else { - av_frame_free(&PropFrame); - throw BestSourceException("Ambisonics and custom channel orders not supported"); - } - - AP.NumSamples = (FormatContext->duration * PropFrame->sample_rate) / AV_TIME_BASE - FormatContext->streams[TrackNumber]->codecpar->initial_padding; - if (PropFrame->pts != AV_NOPTS_VALUE) - AP.StartTime = (static_cast(FormatContext->streams[TrackNumber]->time_base.num) * PropFrame->pts) / FormatContext->streams[TrackNumber]->time_base.den; - - av_frame_free(&PropFrame); - - if (AP.AF.Bits <= 0) //FIXME, can this still happen? - throw BestSourceException("Codec returned zero size audio"); + AP.Duration = FormatContext->streams[TrackNumber]->duration; + AP.TimeBase = FormatContext->streams[TrackNumber]->time_base; + AP.NumSamples = (FormatContext->duration * CodecContext->sample_rate) / AV_TIME_BASE - FormatContext->streams[TrackNumber]->codecpar->initial_padding; } AVFrame *LWAudioDecoder::GetNextFrame() { @@ -382,8 +351,8 @@ BestAudioFrame *BestAudioSource::Cache::GetFrame(int64_t N) { return nullptr; } -BestAudioSource::BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, bool VariableFormat, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map *LAVFOpts, double DrcScale, const ProgressFunction &Progress) - : Source(SourceFile), AudioTrack(Track), VariableFormat(VariableFormat), DrcScale(DrcScale), Threads(Threads) { +BestAudioSource::BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map *LAVFOpts, double DrcScale, const ProgressFunction &Progress) + : Source(SourceFile), AudioTrack(Track), DrcScale(DrcScale), Threads(Threads) { // Only make file path absolute if it exists to pass through special protocol paths std::error_code ec; if (std::filesystem::exists(SourceFile, ec)) @@ -395,7 +364,7 @@ BestAudioSource::BestAudioSource(const std::filesystem::path &SourceFile, int Tr if (CacheMode < 0 || CacheMode > 4) throw BestSourceException("CacheMode must be between 0 and 4"); - std::unique_ptr Decoder(new LWAudioDecoder(Source, AudioTrack, VariableFormat, Threads, LAVFOptions, DrcScale)); + std::unique_ptr Decoder(new LWAudioDecoder(Source, AudioTrack, Threads, LAVFOptions, DrcScale)); Decoder->GetAudioProperties(AP); AudioTrack = Decoder->GetTrack(); @@ -411,9 +380,11 @@ BestAudioSource::BestAudioSource(const std::filesystem::path &SourceFile, int Tr } } - AP.NumFrames = TrackIndex.Frames.size(); - AP.NumSamples = TrackIndex.Frames.back().Start + TrackIndex.Frames.back().Length; + InitializeFormatSets(); + SelectFormatSet(-1); + + // FIXME, rework delay adjustment if (AjustDelay >= -1) SampleDelay = static_cast(GetRelativeStartTime(AjustDelay) * AP.SampleRate); @@ -435,7 +406,7 @@ void BestAudioSource::SetSeekPreRoll(int64_t Frames) { } bool BestAudioSource::IndexTrack(const ProgressFunction &Progress) { - std::unique_ptr Decoder(new LWAudioDecoder(Source, AudioTrack, VariableFormat, Threads, LAVFOptions, DrcScale)); + std::unique_ptr Decoder(new LWAudioDecoder(Source, AudioTrack, Threads, LAVFOptions, DrcScale)); int64_t FileSize = Progress ? Decoder->GetSourceSize() : -1; @@ -444,9 +415,19 @@ bool BestAudioSource::IndexTrack(const ProgressFunction &Progress) { while (true) { AVFrame *F = Decoder->GetNextFrame(); if (!F) - break; + break; + + if (F->ch_layout.order == AV_CHANNEL_ORDER_NATIVE) { + TrackIndex.Frames.push_back({ F->pts, NumSamples, F->nb_samples, F->format, F->sample_rate, F->ch_layout.nb_channels, F->ch_layout.u.mask, GetHash(F) }); + } else if (F->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) { + AVChannelLayout ch = {}; + av_channel_layout_default(&ch, F->ch_layout.nb_channels); + TrackIndex.Frames.push_back({ F->pts, NumSamples, F->nb_samples, F->format, F->sample_rate, F->ch_layout.nb_channels, ch.u.mask, GetHash(F) }); + } else { + av_frame_free(&F); + throw BestSourceException("Ambisonics and custom channel orders not supported"); + } - TrackIndex.Frames.push_back({ F->pts, NumSamples, F->nb_samples, GetHash(F) }); NumSamples += F->nb_samples; av_frame_free(&F); @@ -462,6 +443,46 @@ bool BestAudioSource::IndexTrack(const ProgressFunction &Progress) { return !TrackIndex.Frames.empty(); } +void BestAudioSource::InitializeFormatSets() { + std::map, std::tuple> SeenSets; + for (const auto &Iter : TrackIndex.Frames) { + auto V = std::make_tuple(Iter.Format, Iter.SampleRate, Iter.Channels, Iter.ChannelLayout); + if (SeenSets.insert(std::make_pair(V, std::make_tuple(0, 0, Iter.PTS))).second) + FormatSets.push_back(FormatSet{ {}, Iter.Format, Iter.SampleRate, Iter.Channels, Iter.ChannelLayout }); + std::get<0>(SeenSets[V])++; + std::get<1>(SeenSets[V]) += Iter.Length; + } + + for (auto &Iter : FormatSets) { + auto V = std::make_tuple(Iter.Format, Iter.SampleRate, Iter.Channels, Iter.ChannelLayout); + Iter.NumFrames = std::get<0>(SeenSets[V]); + Iter.NumSamples = std::get<1>(SeenSets[V]); + if (std::get<2>(SeenSets[V]) != AV_NOPTS_VALUE) + Iter.StartTime = (static_cast(AP.TimeBase.Num) * std::get<2>(SeenSets[V])) / AP.TimeBase.Den; + Iter.AF.Set(Iter.Format, 0); // FIXME, needs exact bits + } + + DefaultFormatSet = FormatSets[0]; + DefaultFormatSet.NumFrames = TrackIndex.Frames.size(); + DefaultFormatSet.NumSamples = 0; + for (const auto &Iter : FormatSets) { + DefaultFormatSet.NumSamples += Iter.NumSamples; + + if (DefaultFormatSet.Format != Iter.Format) + DefaultFormatSet.Format = AV_SAMPLE_FMT_NONE; + + if (DefaultFormatSet.SampleRate != Iter.SampleRate) + DefaultFormatSet.SampleRate = 0; + + if (DefaultFormatSet.Channels != Iter.Channels || DefaultFormatSet.ChannelLayout != Iter.ChannelLayout) { + DefaultFormatSet.Channels = 0; + DefaultFormatSet.ChannelLayout = 0; + } + } + if (DefaultFormatSet.Format != AV_SAMPLE_FMT_NONE) + DefaultFormatSet.AF.Set(DefaultFormatSet.Format, 0); // FIXME, needs exact bits +} + double BestAudioSource::GetRelativeStartTime(int Track) const { if (Track < 0) { try { @@ -480,7 +501,7 @@ double BestAudioSource::GetRelativeStartTime(int Track) const { return AP.StartTime - VP.StartTime; } catch (BestSourceException &) { try { - std::unique_ptr Dec(new LWAudioDecoder(Source, false, Track, Threads, LAVFOptions, 0)); + std::unique_ptr Dec(new LWAudioDecoder(Source, Track, Threads, LAVFOptions, 0)); BSAudioProperties AP2; Dec->GetAudioProperties(AP2); return AP.StartTime - AP2.StartTime; @@ -495,6 +516,29 @@ const BSAudioProperties &BestAudioSource::GetAudioProperties() const { return AP; } +const std::vector &BestAudioSource::GetFormatSets() const { + return FormatSets; +} + +void BestAudioSource::SelectFormatSet(int Index) { + if (Index >= static_cast(FormatSets.size()) || Index < -1) + throw BestSourceException("Invalid format set"); + VariableFormat = Index; + BestAudioSource::FormatSet &SrcSet = (Index < 0) ? DefaultFormatSet : FormatSets[Index]; + + AP.AF = SrcSet.AF; + AP.Format = SrcSet.Format; + AP.SampleRate = SrcSet.SampleRate; + AP.Channels = SrcSet.Channels; + AP.ChannelLayout = SrcSet.ChannelLayout; + + AP.StartTime = SrcSet.StartTime; + + AP.NumFrames = SrcSet.NumFrames; + AP.NumSamples = SrcSet.NumSamples; +} + + // Short algorithm summary // 1. If a current decoder is close to the requested frame simply start from there // Determine if a decoder is "close" based on whether or not it is already in the optimal zone based on the existing keyframes @@ -509,6 +553,21 @@ BestAudioFrame *BestAudioSource::GetFrame(int64_t N, bool Linear) { if (N < 0 || N >= AP.NumFrames) return nullptr; + // Adjust frame number if an output format is chosen + if (VariableFormat >= 0 && FormatSets.size() > 1) { + const auto &ActiveSet = FormatSets[VariableFormat]; + int64_t UsableFrames = 0; + int64_t SourceN = N; + for (const auto &Iter : TrackIndex.Frames) { + if (Iter.Format != ActiveSet.Format || Iter.SampleRate != ActiveSet.SampleRate || Iter.Channels != ActiveSet.Channels || Iter.ChannelLayout != ActiveSet.ChannelLayout) { + N++; + } else { + if (UsableFrames++ == SourceN) + break; + } + } + } + std::unique_ptr F(FrameCache.GetFrame(N)); if (!F) F.reset(Linear ? GetFrameLinearInternal(N) : GetFrameInternal(N)); @@ -739,7 +798,7 @@ BestAudioFrame *BestAudioSource::GetFrameInternal(int64_t N) { int Index = (EmptySlot >= 0) ? EmptySlot : LeastRecentlyUsed; if (!Decoders[Index]) - Decoders[Index].reset(new LWAudioDecoder(Source, AudioTrack, VariableFormat, Threads, LAVFOptions, DrcScale)); + Decoders[Index].reset(new LWAudioDecoder(Source, AudioTrack, Threads, LAVFOptions, DrcScale)); DecoderLastUse[Index] = DecoderSequenceNum++; @@ -764,7 +823,7 @@ BestAudioFrame *BestAudioSource::GetFrameLinearInternal(int64_t N, int64_t SeekF // If an empty slot exists simply spawn a new decoder there or reuse the least recently used decoder slot if no free ones exist if (Index < 0) { Index = (EmptySlot >= 0) ? EmptySlot : LeastRecentlyUsed; - Decoders[Index].reset(new LWAudioDecoder(Source, AudioTrack, VariableFormat, Threads, LAVFOptions, DrcScale)); + Decoders[Index].reset(new LWAudioDecoder(Source, AudioTrack, Threads, LAVFOptions, DrcScale)); } std::unique_ptr &Decoder = Decoders[Index]; @@ -979,7 +1038,8 @@ bool BestAudioSource::FillInFramePlanar(const BestAudioFrame *Frame, int64_t Fra } void BestAudioSource::GetPackedAudio(uint8_t *Data, int64_t Start, int64_t Count) { - if (VariableFormat) + // FIXME, relax the restriction to only requiring the same format within the range if anyone complains + if (AP.Format == 0 || AP.Channels == 0 || AP.ChannelLayout == 0 || AP.SampleRate == 0) throw BestSourceException("GetPackedAudio() can only be used when variable format is disabled"); Start -= SampleDelay; @@ -1009,7 +1069,7 @@ void BestAudioSource::GetPackedAudio(uint8_t *Data, int64_t Start, int64_t Count } void BestAudioSource::GetPlanarAudio(uint8_t *const *const Data, int64_t Start, int64_t Count) { - if (VariableFormat) + if (AP.Format == 0 || AP.Channels == 0 || AP.ChannelLayout == 0 || AP.SampleRate == 0) throw BestSourceException("GetPlanarAudio() can only be used when variable format is disabled"); Start -= SampleDelay; @@ -1046,13 +1106,19 @@ void BestAudioSource::GetPlanarAudio(uint8_t *const *const Data, int64_t Start, //////////////////////////////////////// // Index read/write -typedef std::array AudioCompArray; +typedef std::array AudioCompArray; -static AudioCompArray GetAudioCompArray(int64_t PTS, int64_t Length) { +static AudioCompArray GetAudioCompArray(int64_t PTS, int64_t Length, int Format, int SampleRate, int Channels, uint64_t ChannelLayout) { AudioCompArray Result; memcpy(Result.data(), &PTS, sizeof(PTS)); memcpy(Result.data() + sizeof(PTS), &Length, sizeof(Length)); - return Result; + memcpy(Result.data() + sizeof(PTS) + sizeof(Length), &Format, sizeof(Format)); + + memcpy(Result.data() + sizeof(PTS) + sizeof(Length) + sizeof(Format), &SampleRate, sizeof(SampleRate)); + memcpy(Result.data() + sizeof(PTS) + sizeof(Length) + sizeof(Format) + sizeof(SampleRate), &Channels, sizeof(Channels)); + memcpy(Result.data() + sizeof(PTS) + sizeof(Length) + sizeof(Format) + sizeof(SampleRate) + sizeof(Channels), &ChannelLayout, sizeof(ChannelLayout)); + + return Result; } bool BestAudioSource::WriteAudioTrackIndex(bool AbsolutePath, const std::filesystem::path &CachePath) { @@ -1062,7 +1128,6 @@ bool BestAudioSource::WriteAudioTrackIndex(bool AbsolutePath, const std::filesys WriteBSHeader(F, false); WriteInt64(F, FileSize); WriteInt(F, AudioTrack); - WriteInt(F, VariableFormat); WriteDouble(F, DrcScale); WriteInt(F, static_cast(LAVFOptions.size())); @@ -1088,7 +1153,7 @@ bool BestAudioSource::WriteAudioTrackIndex(bool AbsolutePath, const std::filesys LastPTSValue = OrigPTS; } - Dict.insert(std::make_pair(GetAudioCompArray(PTS, Iter.Length), 0)); + Dict.insert(std::make_pair(GetAudioCompArray(PTS, Iter.Length, Iter.Format, Iter.SampleRate, Iter.Channels, Iter.ChannelLayout), 0)); } // Only bother with a dictionary if it's not too big @@ -1113,7 +1178,7 @@ bool BestAudioSource::WriteAudioTrackIndex(bool AbsolutePath, const std::filesys LastPTSValue = OrigPTS; } - WriteByte(F, Dict[GetAudioCompArray(PTS, Iter.Length)]); + WriteByte(F, Dict[GetAudioCompArray(PTS, Iter.Length, Iter.Format, Iter.SampleRate, Iter.Channels, Iter.ChannelLayout)]); fwrite(Iter.Hash.data(), 1, Iter.Hash.size(), F.get()); } } else { @@ -1123,6 +1188,10 @@ bool BestAudioSource::WriteAudioTrackIndex(bool AbsolutePath, const std::filesys fwrite(Iter.Hash.data(), 1, Iter.Hash.size(), F.get()); WriteInt64(F, Iter.PTS); WriteInt64(F, Iter.Length); + WriteInt(F, Iter.Format); + WriteInt(F, Iter.SampleRate); + WriteInt(F, Iter.Channels); + WriteInt64(F, Iter.ChannelLayout); } } @@ -1139,8 +1208,6 @@ bool BestAudioSource::ReadAudioTrackIndex(bool AbsolutePath, const std::filesyst return false; if (!ReadCompareInt(F, AudioTrack)) return false; - if (!ReadCompareInt(F, VariableFormat)) - return false; if (!ReadCompareDouble(F, DrcScale)) return false; @@ -1166,6 +1233,10 @@ bool BestAudioSource::ReadAudioTrackIndex(bool AbsolutePath, const std::filesyst FrameInfo FI = {}; FI.PTS = ReadInt64(F); FI.Length = ReadInt64(F); + FI.Format = ReadInt(F); + FI.SampleRate = ReadInt(F); + FI.Channels = ReadInt(F); + FI.ChannelLayout = ReadInt64(F); Dict[i] = FI; } @@ -1189,6 +1260,10 @@ bool BestAudioSource::ReadAudioTrackIndex(bool AbsolutePath, const std::filesyst FI.PTS = ReadInt64(F); FI.Start = AP.NumSamples; FI.Length = ReadInt64(F); + FI.Format = ReadInt(F); + FI.SampleRate = ReadInt(F); + FI.Channels = ReadInt(F); + FI.ChannelLayout = ReadInt64(F); AP.NumSamples += FI.Length; TrackIndex.Frames.push_back(FI); } diff --git a/src/audiosource.h b/src/audiosource.h index 0f5e857..86bd162 100644 --- a/src/audiosource.h +++ b/src/audiosource.h @@ -45,13 +45,24 @@ struct BSAudioFormat { void Set(int Format, int BitsPerRawSample); }; -struct BSAudioProperties { +// int format, uint64_t ChannelLayout, int samplerate + +struct LWAudioProperties { + BSRational TimeBase; + int64_t Duration; + + int64_t NumSamples; /* estimated by decoder, may be wrong */ +}; + +struct BSAudioProperties : public LWAudioProperties { BSAudioFormat AF; + int Format; int SampleRate; int Channels; uint64_t ChannelLayout; - int64_t NumFrames; // can be -1 to signal that the number of frames is completely unknown - int64_t NumSamples; /* estimated by decoder, may be wrong */ + + int64_t NumFrames; + double StartTime; /* in seconds */ }; @@ -67,12 +78,12 @@ class LWAudioDecoder { AVPacket *Packet = nullptr; bool Seeked = false; - void OpenFile(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map &LAVFOpts, double DrcScale); + void OpenFile(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map &LAVFOpts, double DrcScale); bool ReadPacket(); bool DecodeNextFrame(bool SkipOutput = false); void Free(); public: - LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, bool VariableFormat, int Threads, const std::map &LAVFOpts, double DrcScale); // Positive track numbers are absolute. Negative track numbers mean nth audio track to simplify things. + LWAudioDecoder(const std::filesystem::path &SourceFile, int Track, int Threads, const std::map &LAVFOpts, double DrcScale); // Positive track numbers are absolute. Negative track numbers mean nth audio track to simplify things. ~LWAudioDecoder(); [[nodiscard]] int64_t GetSourceSize() const; [[nodiscard]] int64_t GetSourcePostion() const; @@ -80,7 +91,7 @@ class LWAudioDecoder { [[nodiscard]] int64_t GetFrameNumber() const; // The frame you will get when calling GetNextFrame() [[nodiscard]] int64_t GetSamplePos() const; // The frame you will get when calling GetNextFrame() void SetFrameNumber(int64_t N, int64_t SampleNumber); // Use after seeking to update internal frame number - void GetAudioProperties(BSAudioProperties &VP); // Decodes one frame and advances the position to retrieve the full properties, only call directly after creation + void GetAudioProperties(LWAudioProperties &VP); // Decodes one frame and advances the position to retrieve the full properties, only call directly after creation [[nodiscard]] AVFrame *GetNextFrame(); bool SkipFrames(int64_t Count); [[nodiscard]] bool HasMoreFrames() const; @@ -105,10 +116,27 @@ class BestAudioFrame { class BestAudioSource { public: + struct FormatSet { + BSAudioFormat AF = {}; + int Format; + int SampleRate; + int Channels; + uint64_t ChannelLayout; + + double StartTime = 0; + + int64_t NumFrames; // can be -1 to signal that the number of frames is completely unknown + int64_t NumSamples; + }; + struct FrameInfo { int64_t PTS; int64_t Start; int64_t Length; + int Format; + int SampleRate; + int Channels; + uint64_t ChannelLayout; std::array Hash; }; private: @@ -144,13 +172,16 @@ class BestAudioSource { AudioTrackIndex TrackIndex; Cache FrameCache; + std::vector FormatSets; + FormatSet DefaultFormatSet; + static constexpr int MaxVideoSources = 4; std::map LAVFOptions; double DrcScale; BSAudioProperties AP = {}; std::filesystem::path Source; int AudioTrack; - bool VariableFormat; + int VariableFormat = -1; int Threads; bool LinearMode = false; uint64_t DecoderSequenceNum = 0; @@ -167,6 +198,7 @@ class BestAudioSource { [[nodiscard]] BestAudioFrame *GetFrameInternal(int64_t N); [[nodiscard]] BestAudioFrame *GetFrameLinearInternal(int64_t N, int64_t SeekFrame = -1, size_t Depth = 0, bool ForceUnseeked = false); [[nodiscard]] bool IndexTrack(const ProgressFunction &Progress = nullptr); + void InitializeFormatSets(); void ZeroFillStartPacked(uint8_t *&Data, int64_t &Start, int64_t &Count); void ZeroFillEndPacked(uint8_t *Data, int64_t Start, int64_t &Count); bool FillInFramePacked(const BestAudioFrame *Frame, int64_t FrameStartSample, uint8_t *&Data, int64_t &Start, int64_t &Count); @@ -180,12 +212,14 @@ class BestAudioSource { int64_t FirstSamplePos; }; - BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, bool VariableFormat, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map *LAVFOpts, double DrcScale, const ProgressFunction &Progress = nullptr); + BestAudioSource(const std::filesystem::path &SourceFile, int Track, int AjustDelay, int Threads, int CacheMode, const std::filesystem::path &CachePath, const std::map *LAVFOpts, double DrcScale, const ProgressFunction &Progress = nullptr); [[nodiscard]] int GetTrack() const; // Useful when opening nth video track to get the actual number void SetMaxCacheSize(size_t Bytes); /* default max size is 1GB */ void SetSeekPreRoll(int64_t Frames); /* the number of frames to cache before the position being fast forwarded to */ double GetRelativeStartTime(int Track) const; [[nodiscard]] const BSAudioProperties &GetAudioProperties() const; + [[nodiscard]] const std::vector &GetFormatSets() const; /* Get a listing of all the number of formats */ + void SelectFormatSet(int Index); /* Sets the output format to the specified format set, passing -1 means the default variable format will be used */ [[nodiscard]] BestAudioFrame *GetFrame(int64_t N, bool Linear = false); [[nodiscard]] FrameRange GetFrameRangeBySamples(int64_t Start, int64_t Count) const; void GetPackedAudio(uint8_t *Data, int64_t Start, int64_t Count); diff --git a/src/avisynth.cpp b/src/avisynth.cpp index 2de58a2..1111d31 100644 --- a/src/avisynth.cpp +++ b/src/avisynth.cpp @@ -304,7 +304,9 @@ class AvisynthAudioSource : public IClip { Opts["use_absolute_path"] = "1"; try { - A.reset(new BestAudioSource(CreateProbablyUTF8Path(Source), Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale)); + A.reset(new BestAudioSource(CreateProbablyUTF8Path(Source), Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale)); + + A->SelectFormatSet(0); const BSAudioProperties &AP = A->GetAudioProperties(); if (AP.AF.Float && AP.AF.Bits == 32) { diff --git a/src/vapoursynth.cpp b/src/vapoursynth.cpp index ce64c84..46c5243 100644 --- a/src/vapoursynth.cpp +++ b/src/vapoursynth.cpp @@ -319,7 +319,7 @@ static void VS_CC CreateBestAudioSource(const VSMap *In, VSMap *Out, void *, VSC if (ShowProgress) { auto NextUpdate = std::chrono::high_resolution_clock::now(); int LastValue = -1; - D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale, + D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale, [vsapi, Core, &NextUpdate, &LastValue](int Track, int64_t Cur, int64_t Total) { if (NextUpdate < std::chrono::high_resolution_clock::now()) { if (Total == INT64_MAX && Cur == Total) { @@ -337,9 +337,11 @@ static void VS_CC CreateBestAudioSource(const VSMap *In, VSMap *Out, void *, VSC })); } else { - D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, false, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale)); + D->A.reset(new BestAudioSource(Source, Track, AdjustDelay, Threads, CacheMode, CachePath ? CachePath : "", &Opts, DrcScale)); } + D->A->SelectFormatSet(0); + const BSAudioProperties &AP = D->A->GetAudioProperties(); D->Is8Bit = (AP.AF.Bits <= 8); if (!vsapi->queryAudioFormat(&D->AI.format, AP.AF.Float, D->Is8Bit ? 16 : AP.AF.Bits, AP.ChannelLayout, Core)) diff --git a/src/version.h b/src/version.h index 78b6472..d72a460 100644 --- a/src/version.h +++ b/src/version.h @@ -21,7 +21,7 @@ #ifndef VERSION_H #define VERSION_H -#define BEST_SOURCE_VERSION_MAJOR 6 +#define BEST_SOURCE_VERSION_MAJOR 9 #define BEST_SOURCE_VERSION_MINOR 0 #endif \ No newline at end of file diff --git a/src/videosource.cpp b/src/videosource.cpp index 3ef9724..e2bdcc6 100644 --- a/src/videosource.cpp +++ b/src/videosource.cpp @@ -1398,10 +1398,10 @@ bool BestVideoSource::InitializeRFF() { } void BestVideoSource::InitializeFormatSets() { - std::map, std::tuple> SeenSets; + std::map, std::tuple> SeenSets; for (const auto &Iter : TrackIndex.Frames) { auto V = std::make_tuple(Iter.Format, Iter.Width, Iter.Height); - if (SeenSets.insert(std::make_pair(V, std::make_tuple(0, 0, 0, &Iter))).second) + if (SeenSets.insert(std::make_pair(V, std::make_tuple(0, 0, Iter.PTS, Iter.TFF))).second) FormatSets.push_back(FormatSet{ {}, Iter.Format, Iter.Width, Iter.Height }); std::get<0>(SeenSets[V])++; std::get<1>(SeenSets[V]) += Iter.RepeatPict + 2; @@ -1410,17 +1410,21 @@ void BestVideoSource::InitializeFormatSets() { for (auto &Iter : FormatSets) { auto V = std::make_tuple(Iter.Format, Iter.Width, Iter.Height); Iter.NumFrames = std::get<0>(SeenSets[V]); - Iter.NumRFFFrames = (std::get<1>(SeenSets[V]) + 1) / 2; - Iter.TFF = std::get<3>(SeenSets[V])->TFF; - if (std::get<3>(SeenSets[V])->PTS != AV_NOPTS_VALUE) - Iter.StartTime = (static_cast(VP.TimeBase.Num) * std::get<3>(SeenSets[V])->PTS) / VP.TimeBase.Den; + Iter.NumRFFFrames = std::get<1>(SeenSets[V]); + Iter.TFF = std::get<3>(SeenSets[V]); + if (std::get<2>(SeenSets[V]) != AV_NOPTS_VALUE) + Iter.StartTime = (static_cast(VP.TimeBase.Num) * std::get<2>(SeenSets[V])) / VP.TimeBase.Den; Iter.VF.Set(av_pix_fmt_desc_get(static_cast(Iter.Format))); } DefaultFormatSet = FormatSets[0]; DefaultFormatSet.NumFrames = TrackIndex.Frames.size(); DefaultFormatSet.NumRFFFrames = 0; - for (const auto &Iter : FormatSets) { + + for (auto &Iter : FormatSets) { + DefaultFormatSet.NumRFFFrames += Iter.NumRFFFrames; + Iter.NumRFFFrames = (Iter.NumRFFFrames + 1) / 2; // Can't round before adding it together + if (DefaultFormatSet.Format != Iter.Format) DefaultFormatSet.Format = AV_PIX_FMT_NONE; if (DefaultFormatSet.Width != Iter.Width || DefaultFormatSet.Height != Iter.Height) { @@ -1428,6 +1432,9 @@ void BestVideoSource::InitializeFormatSets() { DefaultFormatSet.Height = 0; } } + + DefaultFormatSet.NumRFFFrames = (DefaultFormatSet.NumRFFFrames + 1) / 2; + if (DefaultFormatSet.Format != AV_PIX_FMT_NONE) DefaultFormatSet.VF.Set(av_pix_fmt_desc_get(static_cast(DefaultFormatSet.Format))); else diff --git a/src/videosource.h b/src/videosource.h index 183597d..441ae9e 100644 --- a/src/videosource.h +++ b/src/videosource.h @@ -84,7 +84,7 @@ struct LWVideoProperties { struct BSVideoProperties : public LWVideoProperties { BSVideoFormat VF; - int Format; // fixme, needed? + int Format; int Width; int Height;