From 5669350747b53ee390d0235e52e1750211405bb0 Mon Sep 17 00:00:00 2001
From: Gustav Grusell <gustav.grusell@gmail.com>
Date: Sat, 28 Sep 2024 22:30:49 +0200
Subject: [PATCH] wip: VideoIndexer now saves pkt size

Signed-off-by: Gustav Grusell <gustav.grusell@gmail.com>
---
 include/video/VideoIndexer.hh | 25 ++++++++--
 src/video/VideoIndexer.cc     | 90 ++++++++++++++++++++++-------------
 2 files changed, 78 insertions(+), 37 deletions(-)
diff --git a/include/video/VideoIndexer.hh b/include/video/VideoIndexer.hh
index 1793ee5..0fbab97 100644
--- a/include/video/VideoIndexer.hh
+++ b/include/video/VideoIndexer.hh
@@ -13,18 +13,29 @@
 
 namespace vivictpp::video {
 
+struct IndexFrameData {
+  vivictpp::time::Time pts;
+  int size;
+  bool keyFrame;
+};
+
 class VideoIndex {
 
 private:
   std::vector<vivictpp::time::Time> keyFrames;
+  std::vector<vivictpp::video::IndexFrameData> frameDatas;
   std::vector<vivictpp::video::Thumbnail> thumbnails;
   mutable std::mutex m;
 
 private:
-  void addKeyFrame(const vivictpp::time::Time t) {
+  void addFrameData(const IndexFrameData &frameData) {
     std::lock_guard<std::mutex> lg(m);
-    keyFrames.push_back(t);
+    this->frameDatas.push_back(frameData);
+    if (frameData.keyFrame) {
+      keyFrames.push_back(frameData.pts);
+    }
   }
+
   void addThumbnail(const vivictpp::video::Thumbnail &thumbnail) {
     std::lock_guard<std::mutex> lg(m);
     thumbnails.push_back(thumbnail);
@@ -48,6 +59,10 @@ public:
     std::lock_guard<std::mutex> lg(m);
     return thumbnails;
   }
+  const std::vector<vivictpp::video::IndexFrameData> &getFrameDatas() const {
+    std::lock_guard<std::mutex> lg(m);
+    return frameDatas;
+  }
 };
 
 class VideoIndexer {
@@ -59,13 +74,15 @@ public:
   }
   ~VideoIndexer() { stopIndexThread(); }
   void prepareIndex(const std::string &inputFile,
-                    const std::string &formatOptions);
+                    const std::string &formatOptions,
+                    const bool generatThumbnails = true);
 
   const std::shared_ptr<VideoIndex> getIndex() const { return index; }
 
 private:
   void prepareIndexInternal(const std::string &inputFile,
-                            const std::string &formatOptions);
+                            const std::string &formatOptions,
+                            bool generateThumbnails);
   void stopIndexThread() {
     if (indexingThread) {
       stopIndexing = true;
diff --git a/src/video/VideoIndexer.cc b/src/video/VideoIndexer.cc
index cd6cc93..6a5da7b 100644
--- a/src/video/VideoIndexer.cc
+++ b/src/video/VideoIndexer.cc
@@ -9,16 +9,51 @@
 #include "time/Time.hh"
 #include "time/TimeUtils.hh"
 
+std::string thumbnailFilterStr(int maxThumbnailSize) {
+  std::string filterStr = fmt::format(
+      "scale=w={}:h={}:force_original_aspect_ratio=decrease:flags=neighbor,"
+      "format=yuv420p",
+      maxThumbnailSize, maxThumbnailSize);
+  return filterStr;
+}
+
+class ThumbnailDecoder {
+private:
+  vivictpp::libav::Decoder decoder;
+  vivictpp::libav::VideoFilter filter;
+
+public:
+  ThumbnailDecoder(AVStream *stream, int maxThumbnailSize)
+      : decoder(stream->codecpar, vivictpp::libav::DecoderOptions()),
+        filter(stream, decoder.getCodecContext(),
+               thumbnailFilterStr(maxThumbnailSize)) {}
+
+  std::vector<vivictpp::libav::Frame> decode(AVPacket *packet) {
+    std::vector<vivictpp::libav::Frame> frames;
+    for (auto frame : decoder.handlePacket(packet)) {
+      frames.push_back(filter.filterFrame(frame));
+    }
+    for (auto frame : decoder.handlePacket(nullptr)) {
+      frames.push_back(filter.filterFrame(frame));
+    }
+    decoder.flush();
+    return frames;
+  }
+};
+
 void vivictpp::video::VideoIndexer::prepareIndex(
-    const std::string &inputFile, const std::string &formatOptions) {
+    const std::string &inputFile, const std::string &formatOptions,
+    const bool generatThumbnails) {
   stopIndexThread();
   stopIndexing = false;
   indexingThread = std::make_unique<std::thread>(
-      &VideoIndexer::prepareIndexInternal, this, inputFile, formatOptions);
+      &VideoIndexer::prepareIndexInternal, this, inputFile, formatOptions,
+      generatThumbnails);
 }
 
 void vivictpp::video::VideoIndexer::prepareIndexInternal(
-    const std::string &inputFile, const std::string &formatOptions) {
+    const std::string &inputFile, const std::string &formatOptions,
+    const bool generateThumbnails) {
   int64_t t0 = vivictpp::time::relativeTimeMicros();
   vivictpp::libav::FormatHandler formatHandler(inputFile, formatOptions);
   if (formatHandler.getVideoStreams().empty()) {
@@ -26,20 +61,18 @@ void vivictpp::video::VideoIndexer::prepareIndexInternal(
     logger->warn("Indexing failed, No video streams found in input file");
   }
   index->clear();
-  formatHandler.setStreamActive(formatHandler.getVideoStreams()[0]->index);
+  std::set<int> activeStreams({formatHandler.getVideoStreams()[0]->index});
+  formatHandler.setActiveStreams(activeStreams);
 
-  vivictpp::libav::DecoderOptions decoderOptions;
-  AVStream *stream = formatHandler.getVideoStreams()[0];
-  vivictpp::libav::Decoder decoder(stream->codecpar, decoderOptions);
-  std::string filterStr = fmt::format(
-      "scale=w={}:h={}:force_original_aspect_ratio=decrease:flags=neighbor,"
-      "format=yuv420p",
-      maxThumbnailSize, maxThumbnailSize);
-  vivictpp::libav::VideoFilter filter(stream, decoder.getCodecContext(),
-                                      filterStr);
+  std::unique_ptr<ThumbnailDecoder> thumbnailDecoder;
+  if (generateThumbnails) {
+    thumbnailDecoder = std::make_unique<ThumbnailDecoder>(
+        formatHandler.getVideoStreams()[0], maxThumbnailSize);
+  }
 
   vivictpp::time::Time lastPts = vivictpp::time::NO_TIME;
   vivictpp::time::Time duration = formatHandler.formatContext->duration;
+  AVRational streamTimeBase = formatHandler.getVideoStreams()[0]->time_base;
   // Try to get around 100 thumbnails, with at least 5s interval
   vivictpp::time::Time thumbnailInterval = std::max(
       duration / maxThumbnails, vivictpp::time::seconds(minThumbnailInterval));
@@ -47,28 +80,19 @@ void vivictpp::video::VideoIndexer::prepareIndexInternal(
   while (!formatHandler.eof() && !stopIndexing) {
     AVPacket *packet = formatHandler.nextPacket();
     if (packet != nullptr) {
+      vivictpp::time::Time pts = av_rescale_q(packet->pts, streamTimeBase,
+                                              vivictpp::time::TIME_BASE_Q);
       bool keyFrame = packet->flags & AV_PKT_FLAG_KEY;
-      if (keyFrame) {
-        vivictpp::time::Time pts = av_rescale_q(packet->pts, stream->time_base,
-                                                vivictpp::time::TIME_BASE_Q);
-        index->addKeyFrame(pts);
-        if (lastPts == vivictpp::time::NO_TIME ||
-            pts - lastPts >= thumbnailInterval) {
-          lastPts = pts;
-          std::vector<vivictpp::libav::Frame> frames;
-          for (auto frame : decoder.handlePacket(packet)) {
-            frames.push_back(filter.filterFrame(frame));
-          }
-          for (auto frame : decoder.handlePacket(nullptr)) {
-            frames.push_back(filter.filterFrame(frame));
-          }
-          for (auto frame : frames) {
-            if (!frame.empty()) {
-              index->addThumbnail(Thumbnail(pts, frame));
-              break;
-            }
+      index->addFrameData({pts, packet->size, keyFrame});
+      if (keyFrame && generateThumbnails &&
+          (lastPts == vivictpp::time::NO_TIME ||
+           pts - lastPts >= thumbnailInterval)) {
+        lastPts = pts;
+        for (auto frame : thumbnailDecoder->decode(packet)) {
+          if (!frame.empty()) {
+            index->addThumbnail(Thumbnail(pts, frame));
+            break;
           }
-          decoder.flush();
         }
       }
       av_packet_unref(packet);