Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EncasedBufferVec, an higher-performance alternative to StorageBuffer, and make GpuArrayBuffer use it. #12670

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions crates/bevy_pbr/src/render/morph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use bevy_ecs::prelude::*;
use bevy_render::{
batching::NoAutomaticBatching,
mesh::morph::{MeshMorphWeights, MAX_MORPH_WEIGHTS},
render_resource::{BufferUsages, BufferVec},
render_resource::{BufferUsages, RawBufferVec},
renderer::{RenderDevice, RenderQueue},
view::ViewVisibility,
Extract,
Expand All @@ -23,13 +23,13 @@ pub struct MorphIndices(EntityHashMap<MorphIndex>);

#[derive(Resource)]
pub struct MorphUniform {
pub buffer: BufferVec<f32>,
pub buffer: RawBufferVec<f32>,
}

impl Default for MorphUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
buffer: RawBufferVec::new(BufferUsages::UNIFORM),
}
}
}
Expand All @@ -54,7 +54,7 @@ const fn can_align(step: usize, target: usize) -> bool {
const WGPU_MIN_ALIGN: usize = 256;

/// Align a [`BufferVec`] to `N` bytes by padding the end with `T::default()` values.
fn add_to_alignment<T: Pod + Default>(buffer: &mut BufferVec<T>) {
fn add_to_alignment<T: Pod + Default>(buffer: &mut RawBufferVec<T>) {
let n = WGPU_MIN_ALIGN;
let t_size = mem::size_of::<T>();
if !can_align(n, t_size) {
Expand Down
6 changes: 3 additions & 3 deletions crates/bevy_pbr/src/render/skin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use bevy_math::Mat4;
use bevy_render::{
batching::NoAutomaticBatching,
mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
render_resource::{BufferUsages, BufferVec},
render_resource::{BufferUsages, RawBufferVec},
renderer::{RenderDevice, RenderQueue},
view::ViewVisibility,
Extract,
Expand Down Expand Up @@ -36,13 +36,13 @@ pub struct SkinIndices(EntityHashMap<SkinIndex>);
// Notes on implementation: see comment on top of the `extract_skins` system.
#[derive(Resource)]
pub struct SkinUniform {
pub buffer: BufferVec<Mat4>,
pub buffer: RawBufferVec<Mat4>,
}

impl Default for SkinUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
buffer: RawBufferVec::new(BufferUsages::UNIFORM),
}
}
}
Expand Down
175 changes: 169 additions & 6 deletions crates/bevy_render/src/render_resource/buffer_vec.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use std::{iter, marker::PhantomData};

use crate::{
render_resource::Buffer,
renderer::{RenderDevice, RenderQueue},
};
use bytemuck::{cast_slice, Pod};
use wgpu::BufferUsages;
use encase::{
internal::{WriteInto, Writer},
ShaderType,
};
use wgpu::{BufferAddress, BufferUsages};

/// A structure for storing raw bytes that have already been properly formatted
/// for use by the GPU.
Expand All @@ -28,7 +34,7 @@ use wgpu::BufferUsages;
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`BufferVec`]
/// * [`Texture`](crate::render_resource::Texture)
pub struct BufferVec<T: Pod> {
pub struct RawBufferVec<T: Pod> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't add a suggestion but the doc comment has a few mentions to BufferVec that should be renamed.

values: Vec<T>,
buffer: Option<Buffer>,
capacity: usize,
Expand All @@ -38,7 +44,7 @@ pub struct BufferVec<T: Pod> {
label_changed: bool,
}

impl<T: Pod> BufferVec<T> {
impl<T: Pod> RawBufferVec<T> {
pub const fn new(buffer_usage: BufferUsages) -> Self {
Self {
values: Vec::new(),
Expand Down Expand Up @@ -77,7 +83,7 @@ impl<T: Pod> BufferVec<T> {
index
}

pub fn append(&mut self, other: &mut BufferVec<T>) {
pub fn append(&mut self, other: &mut RawBufferVec<T>) {
self.values.append(&mut other.values);
}

Expand Down Expand Up @@ -112,7 +118,7 @@ impl<T: Pod> BufferVec<T> {
let size = self.item_size * capacity;
self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
label: self.label.as_deref(),
size: size as wgpu::BufferAddress,
size: size as BufferAddress,
usage: BufferUsages::COPY_DST | self.buffer_usage,
mapped_at_creation: false,
}));
Expand Down Expand Up @@ -154,9 +160,166 @@ impl<T: Pod> BufferVec<T> {
}
}

impl<T: Pod> Extend<T> for BufferVec<T> {
impl<T: Pod> Extend<T> for RawBufferVec<T> {
#[inline]
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
self.values.extend(iter);
}
}

/// Like [`RawBufferVec`], but doesn't require that the data type `T` be
/// [`Pod`].
///
/// This is a high-performance data structure that you should use whenever
/// possible if your data is more complex than is suitable for [`RawBufferVec`].
/// The [`ShaderType`] trait from the `encase` library is used to ensure that
/// the data is correctly aligned for use by the GPU.
///
/// For performance reasons, unlike [`RawBufferVec`], this type doesn't allow
/// CPU access to the data after it's been added via [`BufferVec::push`]. If you
/// need CPU access to the data, consider another type, such as
/// [`StorageBuffer`].
pub struct BufferVec<T>
where
T: ShaderType + WriteInto,
{
data: Vec<u8>,
buffer: Option<Buffer>,
capacity: usize,
buffer_usage: BufferUsages,
label: Option<String>,
label_changed: bool,
phantom: PhantomData<T>,
}

impl<T> BufferVec<T>
where
T: ShaderType + WriteInto,
{
/// Creates a new [`BufferVec`] with the given [`BufferUsages`].
pub const fn new(buffer_usage: BufferUsages) -> Self {
Self {
data: vec![],
buffer: None,
capacity: 0,
buffer_usage,
label: None,
label_changed: false,
phantom: PhantomData,
}
}

/// Returns a handle to the buffer, if the data has been uploaded.
#[inline]
pub fn buffer(&self) -> Option<&Buffer> {
self.buffer.as_ref()
}

/// Returns the amount of space that the GPU will use before reallocating.
#[inline]
pub fn capacity(&self) -> usize {
self.capacity
}

/// Returns the number of items that have been pushed to this buffer.
#[inline]
pub fn len(&self) -> usize {
self.data.len() / u64::from(T::min_size()) as usize
}

/// Returns true if the buffer is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}

/// Adds a new value and returns its index.
pub fn push(&mut self, value: T) -> usize {
let element_size = u64::from(T::min_size()) as usize;
let offset = self.data.len();

// TODO: Consider using unsafe code to push uninitialized, to prevent
// the zeroing. It shows up in profiles.
self.data.extend(iter::repeat(0).take(element_size));

// Take a slice of the new data for `write_into` to use. This is
// important: it hoists the bounds check up here so that the compiler
// can eliminate all the bounds checks that `write_into` will emit.
let mut dest = &mut self.data[offset..(offset + element_size)];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, nice, I never thought about doing that but it's a neat trick to know.

value.write_into(&mut Writer::new(&value, &mut dest, 0).unwrap());

offset / u64::from(T::min_size()) as usize
}

/// Changes the debugging label of the buffer.
///
/// The next time the buffer is updated (via [`reserve`]), Bevy will inform
/// the driver of the new label.
pub fn set_label(&mut self, label: Option<&str>) {
let label = label.map(str::to_string);

if label != self.label {
self.label_changed = true;
}

self.label = label;
}

/// Returns the label.
pub fn get_label(&self) -> Option<&str> {
self.label.as_deref()
}

/// Creates a [`Buffer`] on the [`RenderDevice`] with size
/// at least `std::mem::size_of::<T>() * capacity`, unless such a buffer already exists.
///
/// If a [`Buffer`] exists, but is too small, references to it will be discarded,
/// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s
/// that are no longer referenced will be deleted by the [`RenderDevice`]
/// once it is done using them (typically 1-2 frames).
///
/// In addition to any [`BufferUsages`] provided when
/// the `BufferVec` was created, the buffer on the [`RenderDevice`]
/// is marked as [`BufferUsages::COPY_DST`](BufferUsages).
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
if capacity <= self.capacity && !self.label_changed {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for my own curiosity, nothing needs to change. Why shouldn't we try to allocate a smaller buffer if the capacity is smaller?

return;
}

self.capacity = capacity;
let size = u64::from(T::min_size()) as usize * capacity;
self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
label: self.label.as_deref(),
size: size as BufferAddress,
usage: BufferUsages::COPY_DST | self.buffer_usage,
mapped_at_creation: false,
}));
self.label_changed = false;
}

/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
/// and the provided [`RenderQueue`].
///
/// Before queuing the write, a [`reserve`](BufferVec::reserve) operation is
/// executed.
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
if self.data.is_empty() {
return;
}

self.reserve(self.data.len() / u64::from(T::min_size()) as usize, device);

let Some(buffer) = &self.buffer else { return };
queue.write_buffer(buffer, 0, &self.data);
}

/// Reduces the length of the buffer.
pub fn truncate(&mut self, len: usize) {
self.data.truncate(u64::from(T::min_size()) as usize * len);
}

/// Removes all elements from the buffer.
pub fn clear(&mut self) {
self.data.clear();
}
}
26 changes: 14 additions & 12 deletions crates/bevy_render/src/render_resource/gpu_array_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{
binding_types::{storage_buffer_read_only, uniform_buffer_sized},
BindGroupLayoutEntryBuilder, StorageBuffer,
BindGroupLayoutEntryBuilder, BufferVec,
};
use crate::{
render_resource::batched_uniform_buffer::BatchedUniformBuffer,
Expand All @@ -10,29 +10,31 @@ use bevy_ecs::{prelude::Component, system::Resource};
use encase::{private::WriteInto, ShaderSize, ShaderType};
use nonmax::NonMaxU32;
use std::marker::PhantomData;
use wgpu::BindingResource;
use wgpu::{BindingResource, BufferUsages};

/// Trait for types able to go in a [`GpuArrayBuffer`].
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}

/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
///
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`].
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest
/// array of T that fits within a uniform buffer binding (within reasonable limits).
/// On platforms that support storage buffers, this is equivalent to
/// [`BufferVec<T>`]. Otherwise, this falls back to a dynamic offset
/// uniform buffer with the largest array of T that fits within a uniform buffer
/// binding (within reasonable limits).
///
/// Other options for storing GPU-accessible data are:
/// * [`StorageBuffer`]
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
#[derive(Resource)]
pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
Uniform(BatchedUniformBuffer<T>),
Storage(StorageBuffer<Vec<T>>),
Storage(BufferVec<T>),
}

impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
Expand All @@ -41,24 +43,22 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
if limits.max_storage_buffers_per_shader_stage == 0 {
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
} else {
GpuArrayBuffer::Storage(StorageBuffer::default())
GpuArrayBuffer::Storage(BufferVec::new(BufferUsages::STORAGE))
}
}

pub fn clear(&mut self) {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
GpuArrayBuffer::Storage(buffer) => buffer.get_mut().clear(),
GpuArrayBuffer::Storage(buffer) => buffer.clear(),
}
}

pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
GpuArrayBuffer::Storage(buffer) => {
let buffer = buffer.get_mut();
let index = buffer.len() as u32;
buffer.push(value);
let index = buffer.push(value) as u32;
GpuArrayBufferIndex {
index,
dynamic_offset: None,
Expand Down Expand Up @@ -91,7 +91,9 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
GpuArrayBuffer::Storage(buffer) => buffer.binding(),
GpuArrayBuffer::Storage(buffer) => {
buffer.buffer().map(|buffer| buffer.as_entire_binding())
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions crates/bevy_render/src/render_resource/storage_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down Expand Up @@ -154,6 +156,8 @@ impl<T: ShaderType + WriteInto> StorageBuffer<T> {
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down
4 changes: 4 additions & 0 deletions crates/bevy_render/src/render_resource/uniform_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ use super::IntoBinding;
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`DynamicUniformBuffer`]
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down Expand Up @@ -168,6 +170,8 @@ impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a UniformBuffer<T> {
/// * [`UniformBuffer`]
/// * [`DynamicUniformBuffer`]
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down
Loading