Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linux: Use Vulkan/CUDA interop in NVENC encoder #1911

Merged
merged 1 commit into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alvr/server/cpp/platform/linux/EncodePipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ std::unique_ptr<alvr::EncodePipeline> alvr::EncodePipeline::Create(Renderer *ren
if(Settings::Instance().m_force_sw_encoding == false) {
if (vk_ctx.nvidia) {
try {
auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, input_frame, vk_frame_ctx, width, height);
auto nvenc = std::make_unique<alvr::EncodePipelineNvEnc>(render, vk_ctx, input_frame, vk_frame_ctx, width, height);
Info("using NvEnc encoder");
return nvenc;
} catch (std::exception &e)
Expand Down
89 changes: 73 additions & 16 deletions alvr/server/cpp/platform/linux/EncodePipelineNvEnc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,44 @@ const char *encoder(ALVR_CODEC codec) {
throw std::runtime_error("invalid codec " + std::to_string(codec));
}

void set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx)
{
AVBufferRef *hw_frames_ref;
AVHWFramesContext *frames_ctx = NULL;
int err = 0;

if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
throw std::runtime_error("Failed to create CUDA frame context.");
}
frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
frames_ctx->format = AV_PIX_FMT_CUDA;
/**
* We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
* as SW format when we get it from HW.
* But NVEnc support only BGR0 format and we easy can just to force it
* Because:
* AV_PIX_FMT_BGRA - 28 ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
* AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
*
* We just to ignore the alpha channel and it's done
*/
frames_ctx->sw_format = AV_PIX_FMT_BGR0;
frames_ctx->width = ctx->width;
frames_ctx->height = ctx->height;
if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
av_buffer_unref(&hw_frames_ref);
throw alvr::AvException("Failed to initialize CUDA frame context:", err);
}
ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
if (!ctx->hw_frames_ctx)
err = AVERROR(ENOMEM);

av_buffer_unref(&hw_frames_ref);
}

} // namespace
alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
VkContext &vk_ctx,
VkFrame &input_frame,
VkFrameCtx &vk_frame_ctx,
uint32_t width,
Expand All @@ -34,6 +70,11 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
int err;
vk_frame = input_frame.make_av_frame(vk_frame_ctx);

err = av_hwdevice_ctx_create_derived(&hw_ctx, AV_HWDEVICE_TYPE_CUDA, vk_ctx.ctx, 0);
if (err < 0) {
throw alvr::AvException("Failed to create a CUDA device:", err);
}

const auto &settings = Settings::Instance();

auto codec_id = ALVR_CODEC(settings.m_codec);
Expand Down Expand Up @@ -93,17 +134,7 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
av_opt_set_int(encoder_ctx->priv_data, "delay", 1, 0);
av_opt_set_int(encoder_ctx->priv_data, "forced-idr", 1, 0);

/**
* We will recieve a frame from HW as AV_PIX_FMT_VULKAN which will converted to AV_PIX_FMT_BGRA
* as SW format when we get it from HW.
* But NVEnc support only BGR0 format and we easy can just to force it
* Because:
* AV_PIX_FMT_BGRA - 28 ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
* AV_PIX_FMT_BGR0 - 123 ///< packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
*
* We just to ignore the alpha channel and it's done
*/
encoder_ctx->pix_fmt = AV_PIX_FMT_BGR0;
encoder_ctx->pix_fmt = AV_PIX_FMT_CUDA;
encoder_ctx->width = width;
encoder_ctx->height = height;
encoder_ctx->time_base = {1, (int)1e9};
Expand All @@ -117,6 +148,8 @@ alvr::EncodePipelineNvEnc::EncodePipelineNvEnc(Renderer *render,
params.framerate = 60.0;
SetParams(params);

set_hwframe_ctx(encoder_ctx, hw_ctx);

err = avcodec_open2(encoder_ctx, codec, NULL);
if (err < 0) {
throw alvr::AvException("Cannot open video encoder codec:", err);
Expand All @@ -131,11 +164,33 @@ alvr::EncodePipelineNvEnc::~EncodePipelineNvEnc() {
}

void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr) {
r->Sync();
timestamp.cpu = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
int err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
if (err) {
throw alvr::AvException("av_hwframe_transfer_data", err);
AVVkFrame *vkf = reinterpret_cast<AVVkFrame*>(vk_frame->data[0]);
vkf->sem_value[0]++;

VkTimelineSemaphoreSubmitInfo timelineInfo = {};
timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
timelineInfo.signalSemaphoreValueCount = 1;
timelineInfo.pSignalSemaphoreValues = &vkf->sem_value[0];

VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;

VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = &timelineInfo;
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = &r->GetOutput().semaphore;
submitInfo.pWaitDstStageMask = &waitStage;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &vkf->sem[0];
VK_CHECK(vkQueueSubmit(r->m_queue, 1, &submitInfo, nullptr));

int err = av_hwframe_get_buffer(encoder_ctx->hw_frames_ctx, hw_frame, 0);
if (err < 0) {
throw alvr::AvException("Failed to allocate CUDA frame", err);
}
err = av_hwframe_transfer_data(hw_frame, vk_frame.get(), 0);
if (err < 0) {
throw alvr::AvException("Failed to transfer Vulkan image to CUDA frame", err);
}

hw_frame->pict_type = idr ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;
Expand All @@ -144,4 +199,6 @@ void alvr::EncodePipelineNvEnc::PushFrame(uint64_t targetTimestampNs, bool idr)
if ((err = avcodec_send_frame(encoder_ctx, hw_frame)) < 0) {
throw alvr::AvException("avcodec_send_frame failed:", err);
}

av_frame_unref(hw_frame);
}
2 changes: 1 addition & 1 deletion alvr/server/cpp/platform/linux/EncodePipelineNvEnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class EncodePipelineNvEnc: public EncodePipeline
{
public:
~EncodePipelineNvEnc();
EncodePipelineNvEnc(Renderer *render, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);
EncodePipelineNvEnc(Renderer *render, VkContext &vk_ctx, VkFrame &input_frame, VkFrameCtx& vk_frame_ctx, uint32_t width, uint32_t height);

void PushFrame(uint64_t targetTimestampNs, bool idr) override;

Expand Down
10 changes: 9 additions & 1 deletion alvr/server/cpp/platform/linux/FrameRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ FrameRender::FrameRender(alvr::VkContext &ctx, init_packet &init, int fds[])

Info("FrameRender: Input size %ux%u", m_width, m_height);

if (Settings::Instance().m_force_sw_encoding) {
m_handle = ExternalHandle::None;
} else if (ctx.amd || ctx.intel) {
m_handle = ExternalHandle::DmaBuf;
} else if (ctx.nvidia) {
m_handle = ExternalHandle::OpaqueFd;
}

setupCustomShaders("pre");

if (Settings::Instance().m_enableColorCorrection) {
Expand Down Expand Up @@ -51,7 +59,7 @@ FrameRender::~FrameRender()

FrameRender::Output FrameRender::CreateOutput()
{
Renderer::CreateOutput(m_width, m_height);
Renderer::CreateOutput(m_width, m_height, m_handle);
return GetOutput();
}

Expand Down
1 change: 1 addition & 0 deletions alvr/server/cpp/platform/linux/FrameRender.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class FrameRender : public Renderer

uint32_t m_width;
uint32_t m_height;
ExternalHandle m_handle = ExternalHandle::None;
ColorCorrection m_colorCorrectionConstants;
FoveationVars m_foveatedRenderingConstants;
std::vector<RenderPipeline*> m_pipelines;
Expand Down
24 changes: 15 additions & 9 deletions alvr/server/cpp/platform/linux/Renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ void Renderer::AddPipeline(RenderPipeline *pipeline)
}
}

void Renderer::CreateOutput(uint32_t width, uint32_t height)
void Renderer::CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle)
{
m_output.imageInfo = {};
m_output.imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
Expand All @@ -279,7 +279,10 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)

std::vector<VkDrmFormatModifierPropertiesEXT> modifierProps;

if (d.haveDrmModifiers) {
VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;

if (d.haveDrmModifiers && handle == ExternalHandle::DmaBuf) {
VkImageDrmFormatModifierListCreateInfoEXT modifierListInfo = {};
modifierListInfo.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;

Expand Down Expand Up @@ -335,20 +338,22 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
modifierListInfo.drmFormatModifierCount = imageModifiers.size();
modifierListInfo.pDrmFormatModifiers = imageModifiers.data();

VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
modifierListInfo.pNext = &extMemImageInfo;

VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else if (d.haveDmaBuf) {
VkExternalMemoryImageCreateInfo extMemImageInfo = {};
extMemImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
} else if (d.haveDmaBuf && handle == ExternalHandle::DmaBuf) {
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
m_output.imageInfo.pNext = &extMemImageInfo;

m_output.imageInfo.tiling = VK_IMAGE_TILING_LINEAR;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else if (handle == ExternalHandle::OpaqueFd) {
extMemImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
m_output.imageInfo.pNext = &extMemImageInfo;

m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
} else {
m_output.imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
VK_CHECK(vkCreateImage(m_dev, &m_output.imageInfo, nullptr, &m_output.image));
Expand All @@ -365,15 +370,16 @@ void Renderer::CreateOutput(uint32_t width, uint32_t height)
memoryReqsInfo.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
memoryReqsInfo.image = m_output.image;
vkGetImageMemoryRequirements2(m_dev, &memoryReqsInfo, &memoryReqs);
m_output.size = memoryReqs.memoryRequirements.size;

VkExportMemoryAllocateInfo memory_export_info = {};
memory_export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
memory_export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
memory_export_info.handleTypes = extMemImageInfo.handleTypes;

VkMemoryDedicatedAllocateInfo memory_dedicated_info = {};
memory_dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
memory_dedicated_info.image = m_output.image;
if (d.haveDmaBuf) {
if (handle != ExternalHandle::None) {
memory_dedicated_info.pNext = &memory_export_info;
}

Expand Down
8 changes: 7 additions & 1 deletion alvr/server/cpp/platform/linux/Renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ class RenderPipeline;
class Renderer
{
public:
enum class ExternalHandle {
None,
DmaBuf,
OpaqueFd
};

struct Output {
VkImage image = VK_NULL_HANDLE;
VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
Expand Down Expand Up @@ -57,7 +63,7 @@ class Renderer

void AddPipeline(RenderPipeline *pipeline);

void CreateOutput(uint32_t width, uint32_t height);
void CreateOutput(uint32_t width, uint32_t height, ExternalHandle handle);

void Render(uint32_t index, uint64_t waitValue);

Expand Down
5 changes: 5 additions & 0 deletions alvr/server/cpp/platform/linux/ffmpeg_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,13 @@ alvr::VkFrame::VkFrame(
av_vkframe->size[0] = size;
av_vkframe->layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;

VkExportSemaphoreCreateInfo exportInfo = {};
exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO;
exportInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;

VkSemaphoreTypeCreateInfo timelineInfo = {};
timelineInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
timelineInfo.pNext = &exportInfo;
timelineInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;

VkSemaphoreCreateInfo semInfo = {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
From f867c4c56ee75d633db2300c0822bfa0020a056e Mon Sep 17 00:00:00 2001
From: David Rosca <[email protected]>
Date: Tue, 28 Nov 2023 14:04:20 +0100
Subject: [PATCH] lavu/hwcontext_vulkan: Fix importing RGBx frames to CUDA

RGBx formats needs NumChannels = 4, but the old code would set it to 1.
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 204b57c011..e3bd6ace9b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2859,7 +2859,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
.arrayDesc = {
.Depth = 0,
.Format = cufmt,
- .NumChannels = 1 + ((planes == 2) && i),
+ .NumChannels = desc->comp[i].step,
.Flags = 0,
},
.numLevels = 1,
--
2.43.0