Skip to content

Commit

Permalink
Linux: Use Vulkan/CUDA interop in NVENC encoder (#1911)
Browse files Browse the repository at this point in the history
It now does direct VkImage copy into CUDA memory instead of doing
the transfer via system memory.

Added FFmpeg patch to fix importing RGBx Vulkan frames, RGBx needs
NumChannels = 4, but FFmpeg would use NumChannels = 1.

Co-authored-by: Rafal Kolanski <[email protected]>
  • Loading branch information
nowrep and Xaphiosis authored Dec 1, 2023
1 parent 3e4318f commit 5596cc5
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 29 deletions.
2 changes: 1 addition & 1 deletion alvr/server/cpp/platform/linux/EncodePipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ std::unique_ptr<alvr::EncodePipeline> alvr::EncodePipeline::Create(Renderer *ren
if(Settings::Instance().m_force_sw_encoding == false) {
if (vk_ctx.nvidia) {
try {
auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, input_frame, vk_frame_ctx, width, height);
auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, vk_ctx, input_frame, vk_frame_ctx, width, height);
Info("using NvEnc encoder");
return nvenc;
} catch (std::exception &e)
Expand Down
89 changes: 73 additions & 16 deletions alvr/server/cpp/platform/linux/EncodePipelineNvEnc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,44 @@ const char *encoder(ALVR_CODEC codec) {
throw std::runtime_error("invalid codec " + std::to_string(codec));
}

void set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx)
{
AVBufferRef *hw_frames_ref;
AVHWFramesContext *frames_ctx = NULL;
int err = 0;

if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
throw std::runtime_error("Failed to create CUDA frame context.");
}
frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
frames_ctx->format = AV_PIX_FMT_CUDA;
/**
* We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
* as SW format when we get it from HW.
* But NVEnc support only BGR0 format and we easy can just to force it
* Because:
* AV_PIX_FMT_BGRA - 28 ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
* AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
*
* We just to ignore the alpha channel and it's done
*/
frames_ctx->sw_format = AV_PIX_FMT_BGR0;
frames_ctx->width = ctx->width;
frames_ctx->height = ctx->height;
if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
av_buffer_unref(&hw_frames_ref);
throw alvr::AvException("Failed to initialize CUDA frame context:", err);
}
ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
if (!ctx->hw_frames_ctx)
err = AVERROR(ENOMEM);

av_buffer_unref(&hw_frames_ref);
}

} // namespace
alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
VkContext &vk_ctx,
VkFrame &input_frame,
VkFrameCtx &vk_frame_ctx,
uint32_t width,
Expand All @@ -34,6 +70,11 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
int err;
vk_frame = input_frame.make_av_frame(vk_frame_ctx);

err = av_hwdevice_ctx_create_derived(&hw_ctx, AV_HWDEVICE_TYPE_CUDA, vk_ctx.ctx, 0);
if (err < 0) {
throw alvr::AvException("Failed to create a CUDA device:", err);
}

const auto &settings = Settings::Instance();

auto codec_id = ALVR_CODEC(settings.m_codec);
Expand Down Expand Up @@ -93,17 +134,7 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
av_opt_set_int(encoder_ctx->priv_data, "delay", 1, 0);
av_opt_set_int(encoder_ctx->priv_data, "forced-idr", 1, 0);

/**
* We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
* as SW format when we get it from HW.
* But NVEnc support only BGR0 format and we easy can just to force it
* Because:
* AV_PIX_FMT_BGRA - 28 ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
* AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
*
* We just to ignore the alpha channel and it's done
*/
encoder_ctx->pix_fmt = AV_PIX_FMT_BGR0;
encoder_ctx->pix_fmt = AV_PIX_FMT_CUDA;
encoder_ctx->width = width;
encoder_ctx->height = height;
encoder_ctx->time_base = {1, (int)1e9};
Expand All @@ -117,6 +148,8 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
params.framerate = 60.0;
SetParams(params);

set_hwframe_ctx(encoder_ctx, hw_ctx);

err = avcodec_open2(encoder_ctx, codec, NULL);
if (err < 0) {
throw alvr::AvException("Cannot open video encoder codec:", err);
Expand All @@ -131,11 +164,33 @@ alvr::EncodePipelineNvEnc::~EncodePipelineNvEnc() {
}

void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr) {
r->Sync();
timestamp.cpu = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
int err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
if (err) {
throw alvr::AvException("av_hwframe_transfer_data", err);
AVVkFrame *vkf = reinterpret_cast<AVVkFrame*>(vk_frame->data[0]);
vkf->sem_value[0]++;

VkTimelineSemaphoreSubmitInfo timelineInfo = {};
timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
timelineInfo.signalSemaphoreValueCount = 1;
timelineInfo.pSignalSemaphoreValues = &vkf->sem_value[0];

VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;

VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = &timelineInfo;
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = &r->GetOutput().semaphore;
submitInfo.pWaitDstStageMask = &waitStage;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &vkf->sem[0];
VK_CHECK(vkQueueSubmit(r->m_queue, 1, &submitInfo, nullptr));

int err = av_hwframe_get_buffer(encoder_ctx->hw_frames_ctx, hw_frame, 0);
if (err < 0) {
throw alvr::AvException("Failed to allocate CUDA frame", err);
}
err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
if (err < 0) {
throw alvr::AvException("Failed to transfer Vulkan image to CUDA frame", err);
}

hw_frame->pict_type = idr ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;
Expand All @@ -144,4 +199,6 @@ void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr)
if ((err = avcodec_send_frame(encoder_ctx, hw_frame)) < 0) {
throw alvr::AvException("avcodec_send_frame failed:", err);
}

av_frame_unref(hw_frame);
}
2 changes: 1 addition & 1 deletion alvr/server/cpp/platform/linux/EncodePipelineNvEnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class EncodePipelineNvEnc: public EncodePipeline
{
public:
~EncodePipelineNvEnc();
EncodePipelineNvEnc(Renderer *render, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);
EncodePipelineNvEnc(Renderer *render, VkContext &vk_ctx, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);

void PushFrame(uint64_t targetTimestampNs, bool idr) override;

Expand Down
10 changes: 9 additions & 1 deletion alvr/server/cpp/platform/linux/FrameRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ FrameRender::FrameRender(alvr::VkContext &ctx, init_packet &init, int fds[])

Info("FrameRender: Input size %ux%u", m_width, m_height);

if (Settings::Instance().m_force_sw_encoding) {
m_handle = ExternalHandle::None;
} else if (ctx.amd || ctx.intel) {
m_handle = ExternalHandle::DmaBuf;
} else if (ctx.nvidia) {
m_handle = ExternalHandle::OpaqueFd;
}

setupCustomShaders("pre");

if (Settings::Instance().m_enableColorCorrection) {
Expand Down Expand Up @@ -51,7 +59,7 @@ FrameRender::~FrameRender()

FrameRender::Output FrameRender::CreateOutput()
{
Renderer::CreateOutput(m_width, m_height);
Renderer::CreateOutput(m_width, m_height, m_handle);
return GetOutput();
}

Expand Down
1 change: 1 addition & 0 deletions alvr/server/cpp/platform/linux/FrameRender.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class FrameRender : public Renderer

uint32_t m_width;
uint32_t m_height;
ExternalHandle m_handle = ExternalHandle::None;
ColorCorrection m_colorCorrectionConstants;
FoveationVars m_foveatedRenderingConstants;
std::vector<RenderPipeline*> m_pipelines;
Expand Down
24 changes: 15 additions & 9 deletions alvr/server/cpp/platform/linux/Renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ void Renderer::AddPipeline(RenderPipeline *pipeline)
}
}

void Renderer::CreateOutput(uint32_t width, uint32_t height)
void Renderer::CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle)
{
m_output.imageInfo = {};
m_output.imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
Expand All @@ -279,7 +279,10 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)

std::vector<VkDrmFormatModifierPropertiesEXT> modifierProps;

if (d.haveDrmModifiers) {
VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;

if (d.haveDrmModifiers && handle == ExternalHandle::DmaBuf) {
VkImageDrmFormatModifierListCreateInfoEXT modifierListInfo = {};
modifierListInfo.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;

Expand Down Expand Up @@ -335,20 +338,22 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
modifierListInfo.drmFormatModifierCount = imageModifiers.size();
modifierListInfo.pDrmFormatModifiers = imageModifiers.data();

VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
modifierListInfo.pNext = &extMemImageInfo;

VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else if (d.haveDmaBuf) {
VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
} else if (d.haveDmaBuf && handle == ExternalHandle::DmaBuf) {
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
m_output.imageInfo.pNext = &extMemImageInfo;

m_output.imageInfo.tiling = VK_IMAGE_TILING_LINEAR;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else if (handle == ExternalHandle::OpaqueFd) {
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
m_output.imageInfo.pNext = &extMemImageInfo;

m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else {
m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
Expand All @@ -365,15 +370,16 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
memoryReqsInfo.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
memoryReqsInfo.image = m_output.image;
vkGetImageMemoryRequirements2(m_dev, &memoryReqsInfo, &memoryReqs);
m_output.size = memoryReqs.memoryRequirements.size;

VkExportMemoryAllocateInfo memory_export_info = {};
memory_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
memory_export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
memory_export_info.handleTypes = extMemImageInfo.handleTypes;

VkMemoryDedicatedAllocateInfo memory_dedicated_info = {};
memory_dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
memory_dedicated_info.image = m_output.image;
if (d.haveDmaBuf) {
if (handle != ExternalHandle::None) {
memory_dedicated_info.pNext = &memory_export_info;
}

Expand Down
8 changes: 7 additions & 1 deletion alvr/server/cpp/platform/linux/Renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ class RenderPipeline;
class Renderer
{
public:
enum class ExternalHandle {
None,
DmaBuf,
OpaqueFd
};

struct Output {
VkImage image = VK_NULL_HANDLE;
VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
Expand Down Expand Up @@ -57,7 +63,7 @@ class Renderer

void AddPipeline(RenderPipeline *pipeline);

void CreateOutput(uint32_t width, uint32_t height);
void CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle);

void Render(uint32_t index, uint64_t waitValue);

Expand Down
5 changes: 5 additions & 0 deletions alvr/server/cpp/platform/linux/ffmpeg_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,13 @@ alvr::VkFrame::VkFrame(
av_vkframe->size[0] = size;
av_vkframe->layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;

VkExportSemaphoreCreateInfo exportInfo = {};
exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO;
exportInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;

VkSemaphoreTypeCreateInfo timelineInfo = {};
timelineInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
timelineInfo.pNext = &exportInfo;
timelineInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;

VkSemaphoreCreateInfo semInfo = {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
From f867c4c56ee75d633db2300c0822bfa0020a056e Mon Sep 17 00:00:00 2001
From: David Rosca <[email protected]>
Date: Tue, 28 Nov 2023 14:04:20 +0100
Subject: [PATCH] lavu/hwcontext_vulkan: Fix importing RGBx frames to CUDA

RGBx formats needs NumChannels = 4, but the old code would set it to 1.
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 204b57c011..e3bd6ace9b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2859,7 +2859,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
.arrayDesc = {
.Depth = 0,
.Format = cufmt,
- .NumChannels = 1 + ((planes == 2) && i),
+ .NumChannels = desc->comp[i].step,
.Flags = 0,
},
.numLevels = 1,
--
2.43.0

0 comments on commit 5596cc5

Please sign in to comment.