From 6f29a7d6898b8e459a083210c17a9d0efc4d3024 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 16:44:39 +0200 Subject: [PATCH 1/5] texture_cache: Stricter barriers on image upload --- .../renderer_vulkan/vk_platform.cpp | 2 +- .../texture_cache/texture_cache.cpp | 21 +++++++++++++++++++ src/video_core/texture_cache/tile_manager.cpp | 11 ---------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index dbdabe0d97..40bcb0506d 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off // Include the vulkan platform specific header #if defined(ANDROID) #define VK_USE_PLATFORM_ANDROID_KHR diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 897d6f67e9..03312ce6c0 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -566,7 +566,28 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule copy.bufferOffset += offset; } + const vk::BufferMemoryBarrier pre_barrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const vk::BufferMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, pre_barrier, {}); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, + {}, post_barrier, {}); image.flags &= ~ImageFlagBits::Dirty; } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index ef80f24189..581f74148b 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -340,17 +340,6 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (bpp / 8)); cmdbuf.dispatch(num_tiles, 1, 1); - - const vk::BufferMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eShaderWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .buffer = out_buffer.first, - .size = image_size, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, post_barrier, {}); - return {out_buffer.first, 0}; } From e93aace99bced97fed8fae9940731b4f544e029f Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 16:59:17 +0200 Subject: [PATCH 2/5] buffer_cache: Stricter barrier for vkCmdUpdateBuffer --- src/video_core/buffer_cache/buffer_cache.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 59c1e0bc3f..d2de52d0e6 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -291,7 +291,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, @@ -303,9 +312,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { From a56b092854df6e2c1989abc54d09f717bc7c1b8d Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 17:13:04 +0200 Subject: [PATCH 3/5] vk_rasterizer: Barrier also normal buffers and make it apply to all stages --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 55f8610131..f52237e02d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -568,6 +568,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eAllCommands)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ @@ -606,7 +612,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding if (auto barrier = vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { + vk::PipelineStageFlagBits2::eAllCommands)) { buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { From 33b481fdf5f75ef35e145b55a968166f3ed0603d Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 22:43:08 +0200 Subject: [PATCH 4/5] texture_cache: Minor barrier cleanup * Batch image and buffer barriers in a single command --- src/video_core/buffer_cache/buffer_cache.cpp | 127 ++++++++++++++---- .../renderer_vulkan/vk_platform.cpp | 2 +- .../texture_cache/texture_cache.cpp | 52 ++++--- src/video_core/texture_cache/tile_manager.cpp | 48 +++---- src/video_core/texture_cache/tile_manager.h | 6 +- 5 files changed, 158 insertions(+), 77 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index d2de52d0e6..7b2268cad0 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -510,21 +510,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }; scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const std::array pre_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const std::array post_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = new_buffer.Handle(), + .offset = dst_base_offset, + .size = overlap.SizeBytes(), + }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = pre_barriers.data(), + }); + cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = static_cast(post_barriers.size()), + .pBufferMemoryBarriers = post_barriers.data(), + }); DeleteBuffer(overlap_id); } @@ -628,21 +655,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { @@ -692,10 +733,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eTransfer, {}); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(), copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 40bcb0506d..dbdabe0d97 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#pragma clang optimize off + // Include the vulkan platform specific header #if defined(ANDROID) #define VK_USE_PLATFORM_ANDROID_KHR diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 03312ce6c0..d9fa54d5da 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -542,52 +542,60 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, - cmdbuf); - const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); - // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW - // hazard + + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - const auto dependencies = vk::DependencyInfo{ + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), - }; - cmdbuf.pipelineBarrier2(dependencies); + }); } - const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image); + const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); for (auto& copy : image_copy) { copy.bufferOffset += offset; } - const vk::BufferMemoryBarrier pre_barrier{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, + const vk::BufferMemoryBarrier2 pre_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, .buffer = buffer, .offset = offset, .size = image_size, }; - const vk::BufferMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const vk::BufferMemoryBarrier2 post_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, .buffer = buffer, .offset = offset, .size = image_size, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, pre_barrier, {}); + const auto image_barriers = + image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + vk::PipelineStageFlagBits2::eTransfer, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, - {}, post_barrier, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); image.flags &= ~ImageFlagBits::Dirty; } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 581f74148b..4f3fafbeea 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -4,6 +4,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" @@ -82,10 +83,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { return format; } -const DetilerContext* TileManager::GetDetiler(const Image& image) const { - const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); +const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { + const auto format = DemoteImageFormatForDetiling(info.pixel_format); - switch (image.info.tiling_mode) { + switch (info.tiling_mode) { case AmdGpu::TilingMode::Texture_MicroTiled: switch (format) { case vk::Format::eR8Uint: @@ -254,23 +255,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) { } std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset, - Image& image) { - if (!image.info.props.is_tiled) { + const ImageInfo& info) { + if (!info.props.is_tiled) { return {in_buffer, in_offset}; } - const auto* detiler = GetDetiler(image); + const auto* detiler = GetDetiler(info); if (!detiler) { - if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { + if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", - vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); + vk::to_string(info.pixel_format), NameOf(info.tiling_mode)); } return {in_buffer, in_offset}; } - const u32 image_size = image.info.guest_size_bytes; + const u32 image_size = info.guest_size_bytes; // Prepare output buffer auto out_buffer = AllocBuffer(image_size, true); @@ -313,22 +314,21 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o set_writes); DetilerParams params; - params.num_levels = image.info.resources.levels; - params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); - params.height = image.info.size.height; - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { - ASSERT(image.info.resources.levels == 1); - ASSERT(image.info.num_bits >= 32); - const auto tiles_per_row = image.info.pitch / 8u; - const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); + params.num_levels = info.resources.levels; + params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u); + params.height = info.size.height; + if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + ASSERT(info.resources.levels == 1); + ASSERT(info.num_bits >= 32); + const auto tiles_per_row = info.pitch / 8u; + const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); params.sizes[0] = tiles_per_row; params.sizes[1] = tiles_per_slice; } else { - - ASSERT(image.info.resources.levels <= 14); + ASSERT(info.resources.levels <= 14); std::memset(¶ms.sizes, 0, sizeof(params.sizes)); - for (int m = 0; m < image.info.resources.levels; ++m) { - params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + for (int m = 0; m < info.resources.levels; ++m) { + params.sizes[m] = info.mips_layout[m].size * info.resources.layers + (m > 0 ? params.sizes[m - 1] : 0); } } @@ -337,7 +337,7 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o ¶ms); ASSERT((image_size % 64) == 0); - const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (bpp / 8)); cmdbuf.dispatch(num_tiles, 1, 1); return {out_buffer.first, 0}; diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 72860bca0b..2c7fc214e7 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -5,11 +5,11 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/texture_cache/image.h" namespace VideoCore { class TextureCache; +struct ImageInfo; enum DetilerType : u32 { Micro8x1, @@ -36,14 +36,14 @@ class TileManager { TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image); + std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); void Upload(ScratchBuffer buffer, const void* data, size_t size); void FreeBuffer(ScratchBuffer buffer); private: - const DetilerContext* GetDetiler(const Image& image) const; + const DetilerContext* GetDetiler(const ImageInfo& info) const; private: const Vulkan::Instance& instance; From ed1154df37d586f5e40a6ce6930bc795be0c2328 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 23:09:08 +0200 Subject: [PATCH 5/5] clang format --- src/video_core/texture_cache/texture_cache.cpp | 6 ++++-- src/video_core/texture_cache/tile_manager.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index d9fa54d5da..291e1da7ce 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -547,7 +547,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); - // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW hazard + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW + // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { cmdbuf.pipelineBarrier2(vk::DependencyInfo{ @@ -557,7 +558,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule }); } - const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); + const auto [buffer, offset] = + tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); for (auto& copy : image_copy) { copy.bufferOffset += offset; } diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 2c7fc214e7..1d731d2f24 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -36,7 +36,8 @@ class TileManager { TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info); + std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, + const ImageInfo& info); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); void Upload(ScratchBuffer buffer, const void* data, size_t size);