alvr-org · Vixea · Dec 1, 2023 · Nov 28, 2023
diff --git a/alvr/server/cpp/platform/linux/EncodePipeline.cpp b/alvr/server/cpp/platform/linux/EncodePipeline.cpp
@@ -26,7 +26,7 @@ std::unique_ptr<alvr::EncodePipeline> alvr::EncodePipeline::Create(Renderer *ren
   if(Settings::Instance().m_force_sw_encoding == false) {
     if (vk_ctx.nvidia) {
       try {
-        auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, input_frame, vk_frame_ctx, width, height);
+        auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, vk_ctx, input_frame, vk_frame_ctx, width, height);
         Info("using NvEnc encoder");
         return nvenc;
       } catch (std::exception &e)

diff --git a/alvr/server/cpp/platform/linux/EncodePipelineNvEnc.cpp b/alvr/server/cpp/platform/linux/EncodePipelineNvEnc.cpp
@@ -21,8 +21,44 @@ const char *encoder(ALVR_CODEC codec) {
     throw std::runtime_error("invalid codec " + std::to_string(codec));
 }
 
+void set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx)
+{
+  AVBufferRef *hw_frames_ref;
+  AVHWFramesContext *frames_ctx = NULL;
+  int err = 0;
+
+  if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
+    throw std::runtime_error("Failed to create CUDA frame context.");
+  }
+  frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
+  frames_ctx->format = AV_PIX_FMT_CUDA;
+  /**
+   * We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
+   * as SW format when we get it from HW.
+   * But NVEnc support only BGR0 format and we easy can just to force it
+   * Because:
+   * AV_PIX_FMT_BGRA - 28  ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
+   * AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8,    32bpp, BGRXBGRX...   X=unused/undefined
+   *
+   * We just to ignore the alpha channel and it's done
+   */
+  frames_ctx->sw_format = AV_PIX_FMT_BGR0;
+  frames_ctx->width = ctx->width;
+  frames_ctx->height = ctx->height;
+  if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
+    av_buffer_unref(&hw_frames_ref);
+    throw alvr::AvException("Failed to initialize CUDA frame context:", err);
+  }
+  ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
+  if (!ctx->hw_frames_ctx)
+    err = AVERROR(ENOMEM);
+
+  av_buffer_unref(&hw_frames_ref);
+}
+
 } // namespace
 alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
+                                               VkContext &vk_ctx,
                                                VkFrame &input_frame,
                                                VkFrameCtx &vk_frame_ctx,
                                                uint32_t width,
@@ -34,6 +70,11 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
     int err;
     vk_frame = input_frame.make_av_frame(vk_frame_ctx);
 
+    err = av_hwdevice_ctx_create_derived(&hw_ctx, AV_HWDEVICE_TYPE_CUDA, vk_ctx.ctx, 0);
+    if (err < 0) {
+        throw alvr::AvException("Failed to create a CUDA device:", err);
+    }
+
     const auto &settings = Settings::Instance();
 
     auto codec_id = ALVR_CODEC(settings.m_codec);
@@ -93,17 +134,7 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
     av_opt_set_int(encoder_ctx->priv_data, "delay", 1, 0);
     av_opt_set_int(encoder_ctx->priv_data, "forced-idr", 1, 0);
 
-    /**
-     * We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
-     * as SW format when we get it from HW.
-     * But NVEnc support only BGR0 format and we easy can just to force it
-     * Because:
-     * AV_PIX_FMT_BGRA - 28  ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
-     * AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8,    32bpp, BGRXBGRX...   X=unused/undefined
-     *
-     * We just to ignore the alpha channel and it's done
-     */
-    encoder_ctx->pix_fmt = AV_PIX_FMT_BGR0;
+    encoder_ctx->pix_fmt = AV_PIX_FMT_CUDA;
     encoder_ctx->width = width;
     encoder_ctx->height = height;
     encoder_ctx->time_base = {1, (int)1e9};
@@ -117,6 +148,8 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
     params.framerate = 60.0;
     SetParams(params);
 
+    set_hwframe_ctx(encoder_ctx, hw_ctx);
+
     err = avcodec_open2(encoder_ctx, codec, NULL);
     if (err < 0) {
         throw alvr::AvException("Cannot open video encoder codec:", err);
@@ -131,11 +164,33 @@ alvr::EncodePipelineNvEnc::~EncodePipelineNvEnc() {
 }
 
 void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr) {
-    r->Sync();
-    timestamp.cpu = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
-    int err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
-    if (err) {
-        throw alvr::AvException("av_hwframe_transfer_data", err);
+    AVVkFrame *vkf = reinterpret_cast<AVVkFrame*>(vk_frame->data[0]);
+    vkf->sem_value[0]++;
+
+    VkTimelineSemaphoreSubmitInfo timelineInfo = {};
+    timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
+    timelineInfo.signalSemaphoreValueCount = 1;
+    timelineInfo.pSignalSemaphoreValues = &vkf->sem_value[0];
+
+    VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+
+    VkSubmitInfo submitInfo = {};
+    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    submitInfo.pNext = &timelineInfo;
+    submitInfo.waitSemaphoreCount = 1;
+    submitInfo.pWaitSemaphores = &r->GetOutput().semaphore;
+    submitInfo.pWaitDstStageMask = &waitStage;
+    submitInfo.signalSemaphoreCount = 1;
+    submitInfo.pSignalSemaphores = &vkf->sem[0];
+    VK_CHECK(vkQueueSubmit(r->m_queue, 1, &submitInfo, nullptr));
+
+    int err = av_hwframe_get_buffer(encoder_ctx->hw_frames_ctx, hw_frame, 0);
+    if (err < 0) {
+        throw alvr::AvException("Failed to allocate CUDA frame", err);
+    }
+    err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
+    if (err < 0) {
+        throw alvr::AvException("Failed to transfer Vulkan image to CUDA frame", err);
     }
 
     hw_frame->pict_type = idr ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;
@@ -144,4 +199,6 @@ void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr)
     if ((err = avcodec_send_frame(encoder_ctx, hw_frame)) < 0) {
         throw alvr::AvException("avcodec_send_frame failed:", err);
     }
+
+    av_frame_unref(hw_frame);
 }
diff --git a/alvr/server/cpp/platform/linux/EncodePipelineNvEnc.h b/alvr/server/cpp/platform/linux/EncodePipelineNvEnc.h
@@ -16,7 +16,7 @@ class EncodePipelineNvEnc: public EncodePipeline
 {
 public:
   ~EncodePipelineNvEnc();
-  EncodePipelineNvEnc(Renderer *render, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);
+  EncodePipelineNvEnc(Renderer *render, VkContext &vk_ctx, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);
 
   void PushFrame(uint64_t targetTimestampNs, bool idr) override;
 

diff --git a/alvr/server/cpp/platform/linux/FrameRender.cpp b/alvr/server/cpp/platform/linux/FrameRender.cpp
@@ -20,6 +20,14 @@ FrameRender::FrameRender(alvr::VkContext &ctx, init_packet &init, int fds[])
 
     Info("FrameRender: Input size %ux%u", m_width, m_height);
 
+    if (Settings::Instance().m_force_sw_encoding) {
+        m_handle = ExternalHandle::None;
+    } else if (ctx.amd || ctx.intel) {
+        m_handle = ExternalHandle::DmaBuf;
+    } else if (ctx.nvidia) {
+        m_handle = ExternalHandle::OpaqueFd;
+    }
+
     setupCustomShaders("pre");
 
     if (Settings::Instance().m_enableColorCorrection) {
@@ -51,7 +59,7 @@ FrameRender::~FrameRender()
 
 FrameRender::Output FrameRender::CreateOutput()
 {
-    Renderer::CreateOutput(m_width, m_height);
+    Renderer::CreateOutput(m_width, m_height, m_handle);
     return GetOutput();
 }
 

diff --git a/alvr/server/cpp/platform/linux/FrameRender.h b/alvr/server/cpp/platform/linux/FrameRender.h
@@ -42,6 +42,7 @@ class FrameRender : public Renderer
 
     uint32_t m_width;
     uint32_t m_height;
+    ExternalHandle m_handle = ExternalHandle::None;
     ColorCorrection m_colorCorrectionConstants;
     FoveationVars m_foveatedRenderingConstants;
     std::vector<RenderPipeline*> m_pipelines;

diff --git a/alvr/server/cpp/platform/linux/Renderer.cpp b/alvr/server/cpp/platform/linux/Renderer.cpp
@@ -261,7 +261,7 @@ void Renderer::AddPipeline(RenderPipeline *pipeline)
     }
 }
 
-void Renderer::CreateOutput(uint32_t width, uint32_t height)
+void Renderer::CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle)
 {
     m_output.imageInfo = {};
     m_output.imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
@@ -279,7 +279,10 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
 
     std::vector<VkDrmFormatModifierPropertiesEXT> modifierProps;
 
-    if (d.haveDrmModifiers) {
+    VkExternalMemoryImageCreateInfo extMemImageInfo = {};
+    extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+
+    if (d.haveDrmModifiers && handle == ExternalHandle::DmaBuf) {
         VkImageDrmFormatModifierListCreateInfoEXT modifierListInfo = {};
         modifierListInfo.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
 
@@ -335,20 +338,22 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
         modifierListInfo.drmFormatModifierCount = imageModifiers.size();
         modifierListInfo.pDrmFormatModifiers = imageModifiers.data();
 
-        VkExternalMemoryImageCreateInfo extMemImageInfo = {};
-        extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
         extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
         modifierListInfo.pNext = &extMemImageInfo;
 
         VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
-    } else if (d.haveDmaBuf) {
-        VkExternalMemoryImageCreateInfo extMemImageInfo = {};
-        extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+    } else if (d.haveDmaBuf && handle == ExternalHandle::DmaBuf) {
         extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
         m_output.imageInfo.pNext = &extMemImageInfo;
 
         m_output.imageInfo.tiling = VK_IMAGE_TILING_LINEAR;
         VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
+    } else if (handle == ExternalHandle::OpaqueFd) {
+        extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+        m_output.imageInfo.pNext = &extMemImageInfo;
+
+        m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
+        VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
     } else {
         m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
         VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
@@ -365,15 +370,16 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
     memoryReqsInfo.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
     memoryReqsInfo.image = m_output.image;
     vkGetImageMemoryRequirements2(m_dev, &memoryReqsInfo, &memoryReqs);
+    m_output.size = memoryReqs.memoryRequirements.size;
 
     VkExportMemoryAllocateInfo memory_export_info = {};
     memory_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
-    memory_export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+    memory_export_info.handleTypes = extMemImageInfo.handleTypes;
 
     VkMemoryDedicatedAllocateInfo memory_dedicated_info = {};
     memory_dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
     memory_dedicated_info.image = m_output.image;
-    if (d.haveDmaBuf) {
+    if (handle != ExternalHandle::None) {
         memory_dedicated_info.pNext = &memory_export_info;
     }
 

diff --git a/alvr/server/cpp/platform/linux/Renderer.h b/alvr/server/cpp/platform/linux/Renderer.h
@@ -29,6 +29,12 @@ class RenderPipeline;
 class Renderer
 {
 public:
+    enum class ExternalHandle {
+        None,
+        DmaBuf,
+        OpaqueFd
+    };
+
     struct Output {
         VkImage image = VK_NULL_HANDLE;
         VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -57,7 +63,7 @@ class Renderer
 
     void AddPipeline(RenderPipeline *pipeline);
 
-    void CreateOutput(uint32_t width, uint32_t height);
+    void CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle);
 
     void Render(uint32_t index, uint64_t waitValue);
 

diff --git a/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp b/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp
@@ -310,8 +310,13 @@ alvr::VkFrame::VkFrame(
   av_vkframe->size[0] = size;
   av_vkframe->layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;
 
+  VkExportSemaphoreCreateInfo exportInfo = {};
+  exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO;
+  exportInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+
   VkSemaphoreTypeCreateInfo timelineInfo = {};
   timelineInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
+  timelineInfo.pNext = &exportInfo;
   timelineInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
 
   VkSemaphoreCreateInfo semInfo = {};

diff --git a/alvr/xtask/patches/0001-lavu-hwcontext_vulkan-Fix-importing-RGBx-frames-to-C.patch b/alvr/xtask/patches/0001-lavu-hwcontext_vulkan-Fix-importing-RGBx-frames-to-C.patch
@@ -0,0 +1,26 @@
+From f867c4c56ee75d633db2300c0822bfa0020a056e Mon Sep 17 00:00:00 2001
+From: David Rosca <[email protected]>
+Date: Tue, 28 Nov 2023 14:04:20 +0100
+Subject: [PATCH] lavu/hwcontext_vulkan: Fix importing RGBx frames to CUDA
+
+RGBx formats needs NumChannels = 4, but the old code would set it to 1.
+---
+ libavutil/hwcontext_vulkan.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
+index 204b57c011..e3bd6ace9b 100644
+--- a/libavutil/hwcontext_vulkan.c
++++ b/libavutil/hwcontext_vulkan.c
+@@ -2859,7 +2859,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
+                 .arrayDesc = {
+                     .Depth = 0,
+                     .Format = cufmt,
+-                    .NumChannels = 1 + ((planes == 2) && i),
++                    .NumChannels = desc->comp[i].step,
+                     .Flags = 0,
+                 },
+                 .numLevels = 1,
+-- 
+2.43.0
+