diff --git a/CHANGELOG.md b/CHANGELOG.md index a39cd68f8d..ea27ad58fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -174,6 +174,9 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148] - Replace `usage: Range`, for `BufferUses`, `TextureUses`, and `AccelerationStructureBarrier` with a new `StateTransition`. By @atlv24 in [#6703](https://github.com/gfx-rs/wgpu/pull/6703) - Change the `DropCallback` API to use `FnOnce` instead of `FnMut`. By @jerzywilczek in [#6482](https://github.com/gfx-rs/wgpu/pull/6482) +- Mutable bind groups are now supported in DX12 and Vulkan on `wgpu-hal`. When creating a BindGroup using `create_bind_group` in `wgpu-hal`, set the `ALLOW_UPDATES` flag to allow updating the bind group after creation. Update the bind group using `update_bind_group`, similarly to how it's cr +- DX12 now supports partially bound bind groups, allowing you to skip entries. These will then not get written when calling `create_bind_group`. +- Added `array_element_offset` to `BindGroupLayoutEntry`, to allow for more free-form creating and updating of mutable bind groups for bindless resources. ### Bug Fixes diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index a609edefca..cf27619cca 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2330,6 +2330,7 @@ impl Device { hal_entries.push(hal::BindGroupEntry { binding, resource_index: res_index as u32, + array_element_offset: None, count: count as u32, }); } @@ -2344,6 +2345,7 @@ impl Device { } let hal_desc = hal::BindGroupDescriptor { label: desc.label.to_hal(self.instance_flags), + flags: hal::BindGroupFlags::empty(), layout: layout.raw(), entries: &hal_entries, buffers: &hal_buffers, diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 3045965435..924fe86cf2 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -234,10 +234,12 @@ impl IndirectValidation { let dst_bind_group_desc = hal::BindGroupDescriptor { label: None, + flags: hal::BindGroupFlags::empty(), layout: dst_bind_group_layout.as_ref(), entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, + array_element_offset: None, count: 1, }], buffers: &[hal::BufferBinding { @@ -280,10 +282,12 @@ impl IndirectValidation { }; let hal_desc = hal::BindGroupDescriptor { label: None, + flags: hal::BindGroupFlags::empty(), layout: self.src_bind_group_layout.as_ref(), entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, + array_element_offset: None, count: 1, }], buffers: &[hal::BufferBinding { diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 1418e67191..cdfdc62ca0 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -25,6 +25,8 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0; const GRAVITY: f32 = -9.8 * 100.0; const MAX_VELOCITY: f32 = 750.0; const DESIRED_MAX_LATENCY: u32 = 2; +const MUTABLE_BIND_GROUP: bool = true; +const MUTABLE_BIND_GROUP_UPDATE_FRAMES: usize = 100; #[repr(C)] #[derive(Clone, Copy)] @@ -71,7 +73,7 @@ struct Example { surface_format: wgt::TextureFormat, device: A::Device, queue: A::Queue, - global_group: A::BindGroup, + global_group: [A::BindGroup; 2], local_group: A::BindGroup, global_group_layout: A::BindGroupLayout, local_group_layout: A::BindGroupLayout, @@ -83,8 +85,7 @@ struct Example { local_alignment: u32, global_buffer: A::Buffer, sampler: A::Sampler, - texture: A::Texture, - texture_view: A::TextureView, + textures: Vec<(A::Texture, A::TextureView)>, contexts: Vec>, context_index: usize, extent: [u32; 2], @@ -187,7 +188,12 @@ impl Example { let global_bgl_desc = hal::BindGroupLayoutDescriptor { label: None, - flags: hal::BindGroupLayoutFlags::empty(), + flags: if MUTABLE_BIND_GROUP { + hal::BindGroupLayoutFlags::UPDATE_AFTER_BIND + | hal::BindGroupLayoutFlags::PARTIALLY_BOUND + } else { + hal::BindGroupLayoutFlags::empty() + }, entries: &[ wgt::BindGroupLayoutEntry { binding: 0, @@ -286,7 +292,7 @@ impl Example { let texture_data = [0xFFu8; 4]; let staging_buffer_desc = hal::BufferDescriptor { - label: Some("stage"), + label: Some("Staging Buffer"), size: texture_data.len() as wgt::BufferAddress, usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, @@ -305,77 +311,24 @@ impl Example { assert!(mapping.is_coherent); } - let texture_desc = hal::TextureDescriptor { - label: None, - size: wgt::Extent3d { - width: 1, - height: 1, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgt::TextureDimension::D2, - format: wgt::TextureFormat::Rgba8UnormSrgb, - usage: hal::TextureUses::COPY_DST | hal::TextureUses::RESOURCE, - memory_flags: hal::MemoryFlags::empty(), - view_formats: vec![], - }; - let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; - let cmd_encoder_desc = hal::CommandEncoderDescriptor { label: None, queue: &queue, }; let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; - { - let buffer_barrier = hal::BufferBarrier { - buffer: &staging_buffer, - usage: hal::StateTransition { - from: hal::BufferUses::empty(), - to: hal::BufferUses::COPY_SRC, - }, - }; - let texture_barrier1 = hal::TextureBarrier { - texture: &texture, - range: wgt::ImageSubresourceRange::default(), - usage: hal::StateTransition { - from: hal::TextureUses::UNINITIALIZED, - to: hal::TextureUses::COPY_DST, - }, - }; - let texture_barrier2 = hal::TextureBarrier { - texture: &texture, - range: wgt::ImageSubresourceRange::default(), - usage: hal::StateTransition { - from: hal::TextureUses::COPY_DST, - to: hal::TextureUses::RESOURCE, - }, - }; - let copy = hal::BufferTextureCopy { - buffer_layout: wgt::TexelCopyBufferLayout { - offset: 0, - bytes_per_row: Some(4), - rows_per_image: None, - }, - texture_base: hal::TextureCopyBase { - origin: wgt::Origin3d::ZERO, - mip_level: 0, - array_layer: 0, - aspect: hal::FormatAspects::COLOR, - }, - size: hal::CopyExtent { - width: 1, - height: 1, - depth: 1, - }, - }; - unsafe { - cmd_encoder.transition_buffers(iter::once(buffer_barrier)); - cmd_encoder.transition_textures(iter::once(texture_barrier1)); - cmd_encoder.copy_buffer_to_texture(&staging_buffer, &texture, iter::once(copy)); - cmd_encoder.transition_textures(iter::once(texture_barrier2)); - } + + let texture_count = if MUTABLE_BIND_GROUP { 6 } else { 1 }; + let mut textures = Vec::with_capacity(texture_count); + let mut staging_buffers = Vec::with_capacity(texture_count); + for i in 0..texture_count { + let (texture, texture_view, staging_buffer) = Self::create_texture( + (i + 1) as f32 / texture_count as f32, + &device, + &mut cmd_encoder, + ); + textures.push((texture, texture_view)); + staging_buffers.push(staging_buffer); } let sampler_desc = hal::SamplerDescriptor { @@ -436,15 +389,6 @@ impl Example { }; let local_buffer = unsafe { device.create_buffer(&local_buffer_desc).unwrap() }; - let view_desc = hal::TextureViewDescriptor { - label: None, - format: texture_desc.format, - dimension: wgt::TextureViewDimension::D2, - usage: hal::TextureUses::RESOURCE, - range: wgt::ImageSubresourceRange::default(), - }; - let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; - let global_group = { let global_buffer_binding = hal::BufferBinding { buffer: &global_buffer, @@ -452,11 +396,16 @@ impl Example { size: None, }; let texture_binding = hal::TextureBinding { - view: &texture_view, + view: &textures[0].1, usage: hal::TextureUses::RESOURCE, }; let global_group_desc = hal::BindGroupDescriptor { label: Some("global"), + flags: if MUTABLE_BIND_GROUP { + hal::BindGroupFlags::empty() + } else { + hal::BindGroupFlags::ALLOW_UPDATES + }, layout: &global_group_layout, buffers: &[global_buffer_binding], samplers: &[&sampler], @@ -467,20 +416,26 @@ impl Example { binding: 0, resource_index: 0, count: 1, + array_element_offset: None, }, hal::BindGroupEntry { binding: 1, resource_index: 0, count: 1, + array_element_offset: None, }, hal::BindGroupEntry { binding: 2, resource_index: 0, count: 1, + array_element_offset: None, }, ], }; - unsafe { device.create_bind_group(&global_group_desc).unwrap() } + [ + unsafe { device.create_bind_group(&global_group_desc).unwrap() }, + unsafe { device.create_bind_group(&global_group_desc).unwrap() }, + ] }; let local_group = { @@ -491,6 +446,7 @@ impl Example { }; let local_group_desc = hal::BindGroupDescriptor { label: Some("local"), + flags: hal::BindGroupFlags::empty(), layout: &local_group_layout, buffers: &[local_buffer_binding], samplers: &[], @@ -499,6 +455,7 @@ impl Example { entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, + array_element_offset: None, count: 1, }], }; @@ -517,6 +474,11 @@ impl Example { cmd_encoder.reset_all(iter::once(init_cmd)); fence }; + for staging_buffer in staging_buffers.drain(..) { + unsafe { + device.destroy_buffer(staging_buffer); + } + } Ok(Example { instance, @@ -537,8 +499,7 @@ impl Example { local_alignment, global_buffer, sampler, - texture, - texture_view, + textures, contexts: vec![ExecutionContext { encoder: cmd_encoder, fence, @@ -573,11 +534,15 @@ impl Example { } self.device.destroy_bind_group(self.local_group); - self.device.destroy_bind_group(self.global_group); + for global_group in self.global_group.into_iter() { + self.device.destroy_bind_group(global_group); + } self.device.destroy_buffer(self.local_buffer); self.device.destroy_buffer(self.global_buffer); - self.device.destroy_texture_view(self.texture_view); - self.device.destroy_texture(self.texture); + for (texture, texture_view) in self.textures.drain(..) { + self.device.destroy_texture_view(texture_view); + self.device.destroy_texture(texture); + } self.device.destroy_sampler(self.sampler); self.device.destroy_shader_module(self.shader); self.device.destroy_render_pipeline(self.pipeline); @@ -627,6 +592,112 @@ impl Example { } } + fn create_texture( + value: f32, + device: &A::Device, + encoder: &mut A::CommandEncoder, + ) -> (A::Texture, A::TextureView, A::Buffer) { + let texture_data = [(value.fract() * u8::MAX as f32) as u8; 4]; + + let staging_buffer_desc = hal::BufferDescriptor { + label: Some("Staging Buffer"), + size: texture_data.len() as wgt::BufferAddress, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }; + let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; + unsafe { + let mapping = device + .map_buffer(&staging_buffer, 0..staging_buffer_desc.size) + .unwrap(); + ptr::copy_nonoverlapping( + texture_data.as_ptr(), + mapping.ptr.as_ptr(), + texture_data.len(), + ); + device.unmap_buffer(&staging_buffer); + assert!(mapping.is_coherent); + } + + let texture_desc = hal::TextureDescriptor { + label: Some("Halmark Texture (Updated)"), + size: wgt::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8UnormSrgb, + usage: hal::TextureUses::COPY_DST | hal::TextureUses::RESOURCE, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + { + let buffer_barrier = hal::BufferBarrier { + buffer: &staging_buffer, + usage: hal::StateTransition { + from: hal::BufferUses::empty(), + to: hal::BufferUses::COPY_SRC, + }, + }; + let texture_barrier1 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::StateTransition { + from: hal::TextureUses::UNINITIALIZED, + to: hal::TextureUses::COPY_DST, + }, + }; + let texture_barrier2 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::StateTransition { + from: hal::TextureUses::COPY_DST, + to: hal::TextureUses::RESOURCE, + }, + }; + let copy = hal::BufferTextureCopy { + buffer_layout: wgt::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(4), + rows_per_image: None, + }, + texture_base: hal::TextureCopyBase { + origin: wgt::Origin3d::ZERO, + mip_level: 0, + array_layer: 0, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 1, + height: 1, + depth: 1, + }, + }; + unsafe { + encoder.transition_buffers(iter::once(buffer_barrier)); + encoder.transition_textures(iter::once(texture_barrier1)); + encoder.copy_buffer_to_texture(&staging_buffer, &texture, iter::once(copy)); + encoder.transition_textures(iter::once(texture_barrier2)); + } + } + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::RESOURCE, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + (texture, texture_view, staging_buffer) + } + fn render(&mut self) { let delta = 0.01; for bunny in self.bunnies.iter_mut() { @@ -662,6 +733,41 @@ impl Example { } let ctx = &mut self.contexts[self.context_index]; + let frame = ctx.fence_value as usize; + let global_group_idx = if MUTABLE_BIND_GROUP { + (frame / MUTABLE_BIND_GROUP_UPDATE_FRAMES) % 2 + } else { + 0 + }; + if MUTABLE_BIND_GROUP && frame % MUTABLE_BIND_GROUP_UPDATE_FRAMES == 0 { + let texture_id = frame % self.textures.len(); + let texture_view = &self.textures[texture_id].1; + let texture_binding = hal::TextureBinding { + view: texture_view, + usage: hal::TextureUses::RESOURCE, + }; + + unsafe { + self.device + .update_bind_group( + &self.global_group[global_group_idx], + &hal::UpdateBindGroupDescriptor { + layout: &self.global_group_layout, + entries: &[hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + array_element_offset: None, + }], + buffers: &[], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[], + }, + ) + .unwrap(); + } + } let surface_tex = unsafe { self.surface @@ -726,8 +832,12 @@ impl Example { unsafe { ctx.encoder.begin_render_pass(&pass_desc); ctx.encoder.set_render_pipeline(&self.pipeline); - ctx.encoder - .set_bind_group(&self.pipeline_layout, 0, &self.global_group, &[]); + ctx.encoder.set_bind_group( + &self.pipeline_layout, + 0, + &self.global_group[global_group_idx], + &[], + ); } for i in 0..self.bunnies.len() { diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index b81ef86525..2bf833595b 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -608,6 +608,7 @@ impl Example { }; let group_desc = hal::BindGroupDescriptor { label: Some("bind group"), + flags: hal::BindGroupFlags::empty(), layout: &bgl, buffers: &[buffer_binding], samplers: &[], @@ -618,16 +619,19 @@ impl Example { binding: 0, resource_index: 0, count: 1, + array_element_offset: None, }, hal::BindGroupEntry { binding: 1, resource_index: 0, count: 1, + array_element_offset: None, }, hal::BindGroupEntry { binding: 2, resource_index: 0, count: 1, + array_element_offset: None, }, ], }; diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index f3b7f26f25..cb6ca8521b 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -27,11 +27,11 @@ impl fmt::Debug for DualHandle { } } -type DescriptorIndex = u64; +pub(super) type DescriptorIndex = u64; pub(super) struct GeneralHeap { pub raw: Direct3D12::ID3D12DescriptorHeap, - ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE, + pub(super) ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE, handle_size: u64, total_handles: u64, start: DualHandle, @@ -81,7 +81,7 @@ impl GeneralHeap { } } - fn cpu_descriptor_at(&self, index: u64) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { + pub(super) fn cpu_descriptor_at(&self, index: u64) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, } @@ -93,6 +93,10 @@ impl GeneralHeap { } } + pub(super) fn gpu_descriptor_index(&self, handle: DualHandle) -> DescriptorIndex { + (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size + } + pub(super) fn allocate_slice(&self, count: u64) -> Result { let range = self.ranges.lock().allocate_range(count).map_err(|err| { log::error!("Unable to allocate descriptors: {:?}", err); @@ -248,7 +252,7 @@ pub(super) struct CpuHeap { pub inner: Mutex, start: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE, handle_size: u32, - total: u32, + pub total: u32, } unsafe impl Send for CpuHeap {} @@ -300,24 +304,58 @@ impl fmt::Debug for CpuHeap { } } +/// This function allocates `count` descriptors on the GPU's GeneralHeap under `dst` and then calls +/// `CopyDescriptors` to copy from the CPU heap under `src` into the allocated range in the GPU heap. +/// +/// [`CopyDescriptors` documentation](https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-copydescriptors) +/// +/// This function copies from a sequence of CPU descriptors to a single GPU descriptor range, and +/// so it's only used when creating or fully updating a `BindGroup` that's fully bound. +/// When we have a partially bound `BindGroup`, it's best to write to multiple GPU descriptor ranges. pub(super) unsafe fn upload( device: &Direct3D12::ID3D12Device, src: &CpuHeapInner, - dst: &GeneralHeap, + dst_ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE, + dst_handle: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE, + // All 1's dummy_copy_counts: &[u32], -) -> Result { +) { let count = src.stage.len() as u32; - let index = dst.allocate_slice(count as u64)?; unsafe { + // In this function, we write to a single destination descriptor range from `count` source + // descriptor ranges (which are all the CPU descriptors referenced in the `src.stage` vector). + // The `dummy_copy_counts` array is exclusively ones. device.CopyDescriptors( 1, - &dst.cpu_descriptor_at(index), + &dst_handle, Some(&count), count, src.stage.as_ptr(), Some(dummy_copy_counts.as_ptr()), - dst.ty, - ) - }; - Ok(dst.at(index, count as u64)) + dst_ty, + ); + } +} + +pub(super) unsafe fn multi_update( + device: &Direct3D12::ID3D12Device, + src: &CpuHeapInner, + dst_ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE, + dst_handles: &[Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE], + dst_descriptor_range_sizes: &[u32], + // All 1's + dummy_copy_counts: &[u32], +) { + debug_assert_eq!(dst_handles.len(), dst_descriptor_range_sizes.len()); + unsafe { + device.CopyDescriptors( + dst_handles.len() as u32, + dst_handles.as_ptr(), + Some(dst_descriptor_range_sizes.as_ptr()), + src.stage.len() as u32, + src.stage.as_ptr(), + Some(dummy_copy_counts.as_ptr()), + dst_ty, + ); + } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 20dc20164f..f094db8bed 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,3 +1,4 @@ +use parking_lot::{Mutex, MutexGuard}; use std::{ ffi, mem::{self, size_of, size_of_val}, @@ -6,8 +7,6 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; - -use parking_lot::Mutex; use windows::{ core::Interface as _, Win32::{ @@ -27,6 +26,16 @@ use crate::{ // this has to match Naga's HLSL backend, and also needs to be null-terminated const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; +struct WriteDescriptorsOutput<'a> { + view_cpu_heap: Option>, + view_range_sizes: Vec, + view_handles: Vec, + sampler_cpu_heap: Option>, + sampler_sizes: Vec, + sampler_handles: Vec, + dynamic_buffers: Vec, +} + impl super::Device { pub(super) fn new( raw: Direct3D12::ID3D12Device, @@ -394,6 +403,227 @@ impl super::Device { allocation: None, } } + + #[allow(clippy::too_many_arguments)] + fn write_descriptors<'a>( + &self, + dst_views_index: Option, + dst_samplers_index: Option, + layout: &'a super::BindGroupLayout, + buffers: &[crate::BufferBinding], + samplers: &[&super::Sampler], + textures: &[crate::TextureBinding], + entries: &[crate::BindGroupEntry], + _acceleration_structures: &[&super::AccelerationStructure], + ) -> WriteDescriptorsOutput<'a> { + let partially_bound = layout + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + let mut cpu_views = layout + .scratch_views_cpu_heap + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = layout + .scratch_sampler_cpu_heap + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + // A multi-update destination range can be done per entry, where the range size is the + // `count` and the handle is the `dst.cpu_descriptor_at(index)` offset by the stage length + // and array offset. + let mut multi_update_dst_range_view_sizes = Vec::new(); + let mut multi_update_dst_range_view_handles = Vec::new(); + let mut multi_update_dst_range_sampler_sizes = Vec::new(); + let mut multi_update_dst_range_sampler_handles = Vec::new(); + + let layout_and_entry_iter = entries.iter().map(|entry| { + let (layout, descriptor_offset) = layout + .entries + .iter() + .zip(layout.entry_binding_descriptor_offsets.iter()) + .find(|(layout_entry, _)| layout_entry.binding == entry.binding) + .expect("internal error: no layout entry found with binding slot"); + (layout, entry, *descriptor_offset) + }); + for (layout_entry, entry, descriptor_offset) in layout_and_entry_iter { + // We can't skip array elements if the bind group isn't partially bound + if !partially_bound { + debug_assert_eq!(entry.array_element_offset.unwrap_or(0), 0); + } + + match layout_entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + if partially_bound { + panic!("Dynamic buffers are not supported in partially bound bind groups in DX12"); + } + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &buffers[start..end] { + dynamic_buffers.push(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: data.resolve_address(), + }); + } + } + wgt::BindingType::Buffer { ty, .. } => { + let inner = cpu_views.as_mut().unwrap(); + let cpu_descriptor_index = inner.stage.len(); + + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + let mut cpu_index = + cpu_descriptor_index as u32 + entry.array_element_offset.unwrap_or(0); + for data in &buffers[start..end] { + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let handle = layout + .scratch_views_cpu_heap + .as_ref() + .unwrap() + .at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = Direct3D12::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + unsafe { + self.raw.CreateConstantBufferView(Some(&raw_desc), handle) + }; + } + wgt::BufferBindingType::Storage { read_only: true } => { + let raw_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: + Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER, + Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 { + Buffer: Direct3D12::D3D12_BUFFER_SRV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_RAW, + }, + }, + }; + unsafe { + self.raw.CreateShaderResourceView( + &data.buffer.resource, + Some(&raw_desc), + handle, + ) + }; + } + wgt::BufferBindingType::Storage { read_only: false } => { + let raw_desc = Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: Direct3D12::D3D12_UAV_DIMENSION_BUFFER, + Anonymous: Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC_0 { + Buffer: Direct3D12::D3D12_BUFFER_UAV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: Direct3D12::D3D12_BUFFER_UAV_FLAG_RAW, + }, + }, + }; + unsafe { + self.raw.CreateUnorderedAccessView( + &data.buffer.resource, + None, + Some(&raw_desc), + handle, + ) + }; + } + } + inner.stage.push(handle); + cpu_index += 1; + } + multi_update_dst_range_view_sizes.push(entry.count); + multi_update_dst_range_view_handles.push( + self.shared.heap_views.cpu_descriptor_at( + dst_views_index.unwrap() + + descriptor_offset as u64 + + entry.array_element_offset.unwrap_or(0) as u64, + ), + ); + } + wgt::BindingType::Texture { .. } => { + let inner = cpu_views.as_mut().unwrap(); + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &textures[start..end] { + let handle = data.view.handle_srv.unwrap(); + inner.stage.push(handle.raw); + } + multi_update_dst_range_view_sizes.push(entry.count); + multi_update_dst_range_view_handles.push( + self.shared.heap_views.cpu_descriptor_at( + dst_views_index.unwrap() + + descriptor_offset as u64 + + entry.array_element_offset.unwrap_or(0) as u64, + ), + ); + } + wgt::BindingType::StorageTexture { .. } => { + let inner = cpu_views.as_mut().unwrap(); + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &textures[start..end] { + let handle = data.view.handle_uav.unwrap(); + inner.stage.push(handle.raw); + } + multi_update_dst_range_view_sizes.push(entry.count); + multi_update_dst_range_view_handles.push( + self.shared.heap_views.cpu_descriptor_at( + dst_views_index.unwrap() + + descriptor_offset as u64 + + entry.array_element_offset.unwrap_or(0) as u64, + ), + ); + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &samplers[start..end] { + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + multi_update_dst_range_sampler_sizes.push(entry.count); + multi_update_dst_range_sampler_handles.push( + self.shared.heap_samplers.cpu_descriptor_at( + dst_samplers_index.unwrap() + + descriptor_offset as u64 + + entry.array_element_offset.unwrap_or(0) as u64, + ), + ); + } + wgt::BindingType::AccelerationStructure => todo!(), + } + } + + WriteDescriptorsOutput { + view_cpu_heap: cpu_views, + view_range_sizes: multi_update_dst_range_view_sizes, + view_handles: multi_update_dst_range_view_handles, + sampler_cpu_heap: cpu_samplers, + sampler_sizes: multi_update_dst_range_sampler_sizes, + sampler_handles: multi_update_dst_range_sampler_handles, + dynamic_buffers, + } + } } impl crate::Device for super::Device { @@ -764,8 +994,25 @@ impl crate::Device for super::Device { desc: &crate::BindGroupLayoutDescriptor, ) -> Result { let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + let mut entry_binding_descriptor_offsets = Vec::with_capacity(desc.entries.len()); for entry in desc.entries.iter() { let count = entry.count.map_or(1, NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: false, + .. + } + | wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } => { + entry_binding_descriptor_offsets.push(num_buffer_views + num_texture_views); + } + wgt::BindingType::Sampler(_) => { + entry_binding_descriptor_offsets.push(num_samplers); + } + _ => { + entry_binding_descriptor_offsets.push(0); + } + } match entry.ty { wgt::BindingType::Buffer { has_dynamic_offset: true, @@ -785,7 +1032,8 @@ impl crate::Device for super::Device { let num_views = num_buffer_views + num_texture_views; Ok(super::BindGroupLayout { entries: desc.entries.to_vec(), - cpu_heap_views: if num_views != 0 { + entry_binding_descriptor_offsets, + scratch_views_cpu_heap: if num_views != 0 { let heap = descriptor::CpuHeap::new( &self.raw, Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, @@ -795,7 +1043,7 @@ impl crate::Device for super::Device { } else { None }, - cpu_heap_samplers: if num_samplers != 0 { + scratch_sampler_cpu_heap: if num_samplers != 0 { let heap = descriptor::CpuHeap::new( &self.raw, Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, @@ -806,6 +1054,7 @@ impl crate::Device for super::Device { None }, copy_counts: vec![1; num_views.max(num_samplers) as usize], + flags: desc.flags, }) } @@ -1254,172 +1503,107 @@ impl crate::Device for super::Device { super::AccelerationStructure, >, ) -> Result { - let mut cpu_views = desc + let views_count = desc .layout - .cpu_heap_views + .scratch_views_cpu_heap .as_ref() - .map(|cpu_heap| cpu_heap.inner.lock()); - if let Some(ref mut inner) = cpu_views { - inner.stage.clear(); - } - let mut cpu_samplers = desc + .map(|it| it.total); + let samplers_count = desc .layout - .cpu_heap_samplers + .scratch_sampler_cpu_heap .as_ref() - .map(|cpu_heap| cpu_heap.inner.lock()); - if let Some(ref mut inner) = cpu_samplers { - inner.stage.clear(); - } - let mut dynamic_buffers = Vec::new(); + .map(|it| it.total); - let layout_and_entry_iter = desc.entries.iter().map(|entry| { - let layout = desc - .layout - .entries - .iter() - .find(|layout_entry| layout_entry.binding == entry.binding) - .expect("internal error: no layout entry found with binding slot"); - (layout, entry) - }); - for (layout, entry) in layout_and_entry_iter { - match layout.ty { - wgt::BindingType::Buffer { - has_dynamic_offset: true, - .. - } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.buffers[start..end] { - dynamic_buffers.push(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: data.resolve_address(), - }); - } - } - wgt::BindingType::Buffer { ty, .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.buffers[start..end] { - let gpu_address = data.resolve_address(); - let size = data.resolve_size() as u32; - let inner = cpu_views.as_mut().unwrap(); - let cpu_index = inner.stage.len() as u32; - let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); - match ty { - wgt::BufferBindingType::Uniform => { - let size_mask = - Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; - let raw_desc = Direct3D12::D3D12_CONSTANT_BUFFER_VIEW_DESC { - BufferLocation: gpu_address, - SizeInBytes: ((size - 1) | size_mask) + 1, - }; - unsafe { - self.raw.CreateConstantBufferView(Some(&raw_desc), handle) - }; - } - wgt::BufferBindingType::Storage { read_only: true } => { - let raw_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC { - Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS, - Shader4ComponentMapping: - Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, - ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER, - Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 { - Buffer: Direct3D12::D3D12_BUFFER_SRV { - FirstElement: data.offset / 4, - NumElements: size / 4, - StructureByteStride: 0, - Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_RAW, - }, - }, - }; - unsafe { - self.raw.CreateShaderResourceView( - &data.buffer.resource, - Some(&raw_desc), - handle, - ) - }; - } - wgt::BufferBindingType::Storage { read_only: false } => { - let raw_desc = Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC { - Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS, - ViewDimension: Direct3D12::D3D12_UAV_DIMENSION_BUFFER, - Anonymous: Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC_0 { - Buffer: Direct3D12::D3D12_BUFFER_UAV { - FirstElement: data.offset / 4, - NumElements: size / 4, - StructureByteStride: 0, - CounterOffsetInBytes: 0, - Flags: Direct3D12::D3D12_BUFFER_UAV_FLAG_RAW, - }, - }, - }; - unsafe { - self.raw.CreateUnorderedAccessView( - &data.buffer.resource, - None, - Some(&raw_desc), - handle, - ) - }; - } - } - inner.stage.push(handle); - } - } - wgt::BindingType::Texture { .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.textures[start..end] { - let handle = data.view.handle_srv.unwrap(); - cpu_views.as_mut().unwrap().stage.push(handle.raw); - } - } - wgt::BindingType::StorageTexture { .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.textures[start..end] { - let handle = data.view.handle_uav.unwrap(); - cpu_views.as_mut().unwrap().stage.push(handle.raw); - } - } - wgt::BindingType::Sampler { .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.samplers[start..end] { - cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); - } - } - wgt::BindingType::AccelerationStructure => todo!(), - } - } + let views_allocation = match views_count { + Some(it) => Some(self.shared.heap_views.allocate_slice(it as u64)?), + None => None, + }; + let samplers_allocation = match samplers_count { + Some(it) => Some(self.shared.heap_samplers.allocate_slice(it as u64)?), + None => None, + }; + + let update_params = self.write_descriptors( + views_allocation, + samplers_allocation, + desc.layout, + desc.buffers, + desc.samplers, + desc.textures, + desc.entries, + desc.acceleration_structures, + ); + let partially_bound = desc + .layout + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); - let handle_views = match cpu_views { - Some(inner) => { - let dual = unsafe { + let handle_views = if let Some(cpu_views) = update_params.view_cpu_heap { + if partially_bound { + unsafe { + descriptor::multi_update( + &self.raw, + &cpu_views, + self.shared.heap_views.ty, + &update_params.view_handles, + &update_params.view_range_sizes, + &desc.layout.copy_counts, + ); + } + } else { + unsafe { descriptor::upload( &self.raw, - &inner, - &self.shared.heap_views, + &cpu_views, + self.shared.heap_views.ty, + self.shared + .heap_views + .cpu_descriptor_at(views_allocation.unwrap()), &desc.layout.copy_counts, - ) - }?; - Some(dual) + ); + } } - None => None, + Some( + self.shared + .heap_views + .at(views_allocation.unwrap(), views_count.unwrap() as u64), + ) + } else { + None }; - let handle_samplers = match cpu_samplers { - Some(inner) => { - let dual = unsafe { + + let handle_samplers = if let Some(cpu_samplers) = update_params.sampler_cpu_heap { + if partially_bound { + unsafe { + descriptor::multi_update( + &self.raw, + &cpu_samplers, + self.shared.heap_samplers.ty, + &update_params.sampler_handles, + &update_params.sampler_sizes, + &desc.layout.copy_counts, + ); + } + } else { + unsafe { descriptor::upload( &self.raw, - &inner, - &self.shared.heap_samplers, + &cpu_samplers, + self.shared.heap_samplers.ty, + self.shared + .heap_samplers + .cpu_descriptor_at(samplers_allocation.unwrap()), &desc.layout.copy_counts, - ) - }?; - Some(dual) + ); + } } - None => None, + Some( + self.shared + .heap_samplers + .at(samplers_allocation.unwrap(), samplers_count.unwrap() as u64), + ) + } else { + None }; self.counters.bind_groups.add(1); @@ -1427,10 +1611,101 @@ impl crate::Device for super::Device { Ok(super::BindGroup { handle_views, handle_samplers, - dynamic_buffers, + dynamic_buffers: update_params.dynamic_buffers, }) } + unsafe fn update_bind_group( + &self, + bind_group: &::BindGroup, + desc: &crate::UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), crate::DeviceError> { + let views_index = bind_group + .handle_views + .map(|it| self.shared.heap_views.gpu_descriptor_index(it)); + let samplers_index = bind_group + .handle_samplers + .map(|it| self.shared.heap_samplers.gpu_descriptor_index(it)); + let update_params = self.write_descriptors( + views_index, + samplers_index, + desc.layout, + desc.buffers, + desc.samplers, + desc.textures, + desc.entries, + desc.acceleration_structures, + ); + let partially_bound = desc + .layout + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + if let Some(cpu_views) = update_params.view_cpu_heap { + if partially_bound { + if !update_params.view_handles.is_empty() { + unsafe { + descriptor::multi_update( + &self.raw, + &cpu_views, + self.shared.heap_views.ty, + &update_params.view_handles, + &update_params.view_range_sizes, + &desc.layout.copy_counts, + ); + } + } + } else { + unsafe { + descriptor::upload( + &self.raw, + &cpu_views, + self.shared.heap_views.ty, + self.shared + .heap_views + .cpu_descriptor_at(views_index.unwrap()), + &desc.layout.copy_counts, + ); + } + } + } + + if let Some(cpu_samplers) = update_params.sampler_cpu_heap { + if partially_bound { + if !update_params.sampler_handles.is_empty() { + unsafe { + descriptor::multi_update( + &self.raw, + &cpu_samplers, + self.shared.heap_samplers.ty, + &update_params.sampler_handles, + &update_params.sampler_sizes, + &desc.layout.copy_counts, + ); + } + } + } else { + unsafe { + descriptor::upload( + &self.raw, + &cpu_samplers, + self.shared.heap_samplers.ty, + self.shared + .heap_samplers + .cpu_descriptor_at(samplers_index.unwrap()), + &desc.layout.copy_counts, + ); + } + } + } + Ok(()) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { if let Some(dual) = group.handle_views { self.shared.heap_views.free_slice(dual); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d58d79300a..d81e3146c8 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -892,9 +892,21 @@ impl Fence { pub struct BindGroupLayout { /// Sorted list of entries. entries: Vec, - cpu_heap_views: Option, - cpu_heap_samplers: Option, + /// For each item in `entries`, this has its descriptor offset in the corresponding heap (SRV/CBV/UAV heap or + /// Sampler heap). + /// + /// The value is the sum of the `count` of every previous value in the `entries` list (or 1 if the `count` is `None`). + /// + /// This is used for creating or updating the descriptors using this layout. + entry_binding_descriptor_offsets: Vec, + // These CPU heaps are stored to be re-used whenever a new `BindGroup` is created using this layout. + // In `create_bind_group`, the mutex for the re-usable heaps is acquired, the stage buffer is cleared + // and it is used before a call to CopyDescriptors copies it to the GPU, then it is ready to be + // used again for another creation. + scratch_views_cpu_heap: Option, + scratch_sampler_cpu_heap: Option, copy_counts: Vec, // all 1's + flags: crate::BindGroupLayoutFlags, } impl crate::DynBindGroupLayout for BindGroupLayout {} diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs index 9366270bcf..367346d4f9 100644 --- a/wgpu-hal/src/dynamic/device.rs +++ b/wgpu-hal/src/dynamic/device.rs @@ -336,6 +336,7 @@ impl DynDevice for D { let desc = BindGroupDescriptor { label: desc.label.to_owned(), + flags: desc.flags, layout: desc.layout.expect_downcast_ref(), buffers: &buffers, samplers: &samplers, diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index dd1e183ed2..2af8e74e66 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -227,6 +227,21 @@ impl crate::Device for Context { ) -> DeviceResult { Ok(Resource) } + + unsafe fn update_bind_group( + &self, + bind_group: &::BindGroup, + desc: &crate::UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), crate::DeviceError> { + Ok(()) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} unsafe fn create_shader_module( diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 0df9568698..fdf6c4f78d 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1313,6 +1313,20 @@ impl crate::Device for super::Device { }) } + unsafe fn update_bind_group( + &self, + _bind_group: &::BindGroup, + _desc: &crate::UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), crate::DeviceError> { + todo!() + } + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) { self.counters.bind_groups.sub(1); } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 12234d6364..c458458c06 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -866,6 +866,20 @@ pub trait Device: WasmNotSendSync { ::AccelerationStructure, >, ) -> Result<::BindGroup, DeviceError>; + + #[allow(clippy::type_complexity)] + unsafe fn update_bind_group( + &self, + bind_group: &::BindGroup, + desc: &UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), DeviceError>; + unsafe fn destroy_bind_group(&self, group: ::BindGroup); unsafe fn create_shader_module( @@ -1531,6 +1545,8 @@ bitflags!( pub struct BindGroupLayoutFlags: u32 { /// Allows for bind group binding arrays to be shorter than the array in the BGL. const PARTIALLY_BOUND = 1 << 0; + /// Allows for the bind group to be updated after the bind group has been bound. + const UPDATE_AFTER_BIND = 1 << 1; } ); @@ -2042,18 +2058,32 @@ impl<'a, T: DynTextureView + ?Sized> Clone for TextureBinding<'a, T> { pub struct BindGroupEntry { pub binding: u32, pub resource_index: u32, + pub array_element_offset: Option, pub count: u32, } +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct BindGroupFlags: u8 { + // Allows the bind group to be updated with update_bind_group. + const ALLOW_UPDATES = 1 << 0; + } +} + /// BindGroup descriptor. /// /// Valid usage: -///. - `entries` has to be sorted by ascending `BindGroupEntry::binding` -///. - `entries` has to have the same set of `BindGroupEntry::binding` as `layout` +///. - (Only if `PARTIALLY_BOUND` is **disabled** on the layout): `entries` has to be sorted by +/// ascending `BindGroupEntry::binding` +///. - (Only if `PARTIALLY_BOUND` is **disabled** on the layout): `entries` has to have the same +/// set of `BindGroupEntry::binding` as `layout` ///. - each entry has to be compatible with the `layout` ///. - each entry's `BindGroupEntry::resource_index` is within range /// of the corresponding resource array, selected by the relevant /// `BindGroupLayoutEntry`. +///. - each entry's `BindGroupEntry::array_offset` is within the set of values in the +/// `BindGroupLayoutEntry::count` field. If `array_offset` is not `None`, the layout +/// must have the `PARTIALLY_BOUND` flag set. #[derive(Clone, Debug)] pub struct BindGroupDescriptor< 'a, @@ -2064,6 +2094,7 @@ pub struct BindGroupDescriptor< A: DynAccelerationStructure + ?Sized, > { pub label: Label<'a>, + pub flags: BindGroupFlags, pub layout: &'a Bgl, pub buffers: &'a [BufferBinding<'a, B>], pub samplers: &'a [&'a S], @@ -2072,6 +2103,38 @@ pub struct BindGroupDescriptor< pub acceleration_structures: &'a [&'a A], } +/// Update BindGroup. +/// +/// Valid usage: +///. - (Only if `PARTIALLY_BOUND` is **disabled** on the layout): `entries` has to be sorted by +/// ascending `BindGroupEntry::binding` +///. - (Only if `PARTIALLY_BOUND` is **disabled** on the layout): `entries` has to have the same +/// set of `BindGroupEntry::binding` as `layout` +///. - each entry has to be compatible with the `layout` +///. - each entry's `BindGroupEntry::resource_index` is within range +/// of the corresponding resource array, selected by the relevant +/// `BindGroupLayoutEntry`. +///. - each entry's `BindGroupEntry::array_offset` is within the set of values in the +/// `BindGroupLayoutEntry::count` field. If `array_offset` is not `None`, the layout +/// must have the `PARTIALLY_BOUND` flag set. +/// - `layout` must match the layout of the bind group that is being updated +#[derive(Clone, Debug)] +pub struct UpdateBindGroupDescriptor< + 'a, + Bgl: DynBindGroupLayout + ?Sized, + B: DynBuffer + ?Sized, + S: DynSampler + ?Sized, + T: DynTextureView + ?Sized, + A: DynAccelerationStructure + ?Sized, +> { + pub layout: &'a Bgl, + pub entries: &'a [BindGroupEntry], + pub buffers: &'a [BufferBinding<'a, B>], + pub samplers: &'a [&'a S], + pub textures: &'a [TextureBinding<'a, T>], + pub acceleration_structures: &'a [&'a A], +} + #[derive(Clone, Debug)] pub struct CommandEncoderDescriptor<'a, Q: DynQueue + ?Sized> { pub label: Label<'a>, diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 73d6bcc0e2..07f40b3408 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -891,6 +891,20 @@ impl crate::Device for super::Device { Ok(bg) } + unsafe fn update_bind_group( + &self, + _bind_group: &::BindGroup, + _desc: &UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), crate::DeviceError> { + todo!() + } + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) { self.counters.bind_groups.sub(1); } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f18177292c..e8302ae84b 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1007,6 +1007,176 @@ impl super::Device { pub fn shared_instance(&self) -> &super::InstanceShared { &self.shared.instance } + + #[allow(clippy::too_many_arguments)] + fn write_descriptors( + &self, + set: &gpu_descriptor::DescriptorSet, + layout: &super::BindGroupLayout, + buffers: &[crate::BufferBinding], + samplers: &[&super::Sampler], + textures: &[crate::TextureBinding], + entries: &[crate::BindGroupEntry], + acceleration_structures: &[&super::AccelerationStructure], + ) { + /// Helper for splitting off and initializing a given number of elements on a pre-allocated + /// stack, based on items returned from an [`ExactSizeIterator`]. Typically created from a + /// [`MaybeUninit`] slice (see [`Vec::spare_capacity_mut()`]). + /// The updated [`ExtensionStack`] of remaining uninitialized elements is returned, safely + /// representing that the initialized and remaining elements are two independent mutable + /// borrows. + struct ExtendStack<'a, T> { + remainder: &'a mut [MaybeUninit], + } + + impl<'a, T> ExtendStack<'a, T> { + fn from_vec_capacity(vec: &'a mut Vec) -> Self { + Self { + remainder: vec.spare_capacity_mut(), + } + } + + fn extend_one(self, value: T) -> (Self, &'a mut T) { + let (to_init, remainder) = self.remainder.split_first_mut().unwrap(); + let init = to_init.write(value); + (Self { remainder }, init) + } + + fn extend( + self, + iter: impl IntoIterator + ExactSizeIterator, + ) -> (Self, &'a mut [T]) { + let (to_init, remainder) = self.remainder.split_at_mut(iter.len()); + + for (value, to_init) in iter.into_iter().zip(to_init.iter_mut()) { + to_init.write(value); + } + + // we can't use the safe (yet unstable) MaybeUninit::write_slice() here because of having an iterator to write + + let init = { + // SAFETY: The loop above has initialized exactly as many items as to_init is + // long, so it is safe to cast away the MaybeUninit wrapper into T. + + // Additional safety docs from unstable slice_assume_init_mut + // SAFETY: similar to safety notes for `slice_get_ref`, but we have a + // mutable reference which is also guaranteed to be valid for writes. + unsafe { std::mem::transmute::<&mut [MaybeUninit], &mut [T]>(to_init) } + }; + (Self { remainder }, init) + } + } + + let mut writes = Vec::with_capacity(entries.len()); + let mut buffer_infos = Vec::with_capacity(buffers.len()); + let mut buffer_infos = ExtendStack::from_vec_capacity(&mut buffer_infos); + let mut image_infos = Vec::with_capacity(samplers.len() + textures.len()); + let mut image_infos = ExtendStack::from_vec_capacity(&mut image_infos); + // TODO: This length could be reduced to just the number of top-level acceleration + // structure bindings, where multiple consecutive TLAS bindings that are set via + // one `WriteDescriptorSet` count towards one "info" struct, not the total number of + // acceleration structure bindings to write: + let mut acceleration_structure_infos = Vec::with_capacity(acceleration_structures.len()); + let mut acceleration_structure_infos = + ExtendStack::from_vec_capacity(&mut acceleration_structure_infos); + let mut raw_acceleration_structures = Vec::with_capacity(acceleration_structures.len()); + let mut raw_acceleration_structures = + ExtendStack::from_vec_capacity(&mut raw_acceleration_structures); + for entry in entries { + let (ty, size) = layout.types[entry.binding as usize]; + if size == 0 { + continue; // empty slot + } + let offset = entry.array_element_offset.unwrap_or(0); + let mut write = vk::WriteDescriptorSet::default() + .dst_set(*set.raw()) + .dst_binding(entry.binding) + .descriptor_type(ty) + .dst_array_element(offset); + + write = match ty { + vk::DescriptorType::SAMPLER => { + let start = entry.resource_index; + let end = start + entry.count; + let local_image_infos; + (image_infos, local_image_infos) = + image_infos.extend(samplers[start as usize..end as usize].iter().map( + |sampler| vk::DescriptorImageInfo::default().sampler(sampler.raw), + )); + write.image_info(local_image_infos) + } + vk::DescriptorType::SAMPLED_IMAGE | vk::DescriptorType::STORAGE_IMAGE => { + let start = entry.resource_index; + let end = start + entry.count; + let local_image_infos; + (image_infos, local_image_infos) = + image_infos.extend(textures[start as usize..end as usize].iter().map( + |binding| { + let layout = conv::derive_image_layout( + binding.usage, + binding.view.attachment.view_format, + ); + vk::DescriptorImageInfo::default() + .image_view(binding.view.raw) + .image_layout(layout) + }, + )); + write.image_info(local_image_infos) + } + vk::DescriptorType::UNIFORM_BUFFER + | vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC + | vk::DescriptorType::STORAGE_BUFFER + | vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => { + let start = entry.resource_index; + let end = start + entry.count; + let local_buffer_infos; + (buffer_infos, local_buffer_infos) = buffer_infos.extend( + buffers[start as usize..end as usize].iter().map(|binding| { + vk::DescriptorBufferInfo::default() + .buffer(binding.buffer.raw) + .offset(binding.offset) + .range(binding.size.map_or(vk::WHOLE_SIZE, wgt::BufferSize::get)) + }), + ); + write.buffer_info(local_buffer_infos) + } + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { + let start = entry.resource_index; + let end = start + entry.count; + + let local_raw_acceleration_structures; + ( + raw_acceleration_structures, + local_raw_acceleration_structures, + ) = raw_acceleration_structures.extend( + acceleration_structures[start as usize..end as usize] + .iter() + .map(|acceleration_structure| acceleration_structure.raw), + ); + + let local_acceleration_structure_infos; + ( + acceleration_structure_infos, + local_acceleration_structure_infos, + ) = acceleration_structure_infos.extend_one( + vk::WriteDescriptorSetAccelerationStructureKHR::default() + .acceleration_structures(local_raw_acceleration_structures), + ); + + write + .descriptor_count(entry.count) + .push_next(local_acceleration_structure_infos) + } + _ => unreachable!(), + }; + + writes.push(write); + } + + unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; + + self.counters.bind_groups.add(1); + } } impl crate::Device for super::Device { @@ -1461,6 +1631,9 @@ impl crate::Device for super::Device { let partially_bound = desc .flags .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + let update_after_bind = desc + .flags + .contains(crate::BindGroupLayoutFlags::UPDATE_AFTER_BIND); let vk_info = if partially_bound { binding_flag_vec = desc @@ -1472,6 +1645,9 @@ impl crate::Device for super::Device { if partially_bound && entry.count.is_some() { flags |= vk::DescriptorBindingFlags::PARTIALLY_BOUND; } + if update_after_bind && entry.count.is_some() { + flags |= vk::DescriptorBindingFlags::UPDATE_AFTER_BIND; + } flags }) @@ -1611,170 +1787,42 @@ impl crate::Device for super::Device { unsafe { self.shared.set_object_name(*set.raw(), label) }; } - /// Helper for splitting off and initializing a given number of elements on a pre-allocated - /// stack, based on items returned from an [`ExactSizeIterator`]. Typically created from a - /// [`MaybeUninit`] slice (see [`Vec::spare_capacity_mut()`]). - /// The updated [`ExtensionStack`] of remaining uninitialized elements is returned, safely - /// representing that the initialized and remaining elements are two independent mutable - /// borrows. - struct ExtendStack<'a, T> { - remainder: &'a mut [MaybeUninit], - } - - impl<'a, T> ExtendStack<'a, T> { - fn from_vec_capacity(vec: &'a mut Vec) -> Self { - Self { - remainder: vec.spare_capacity_mut(), - } - } - - fn extend_one(self, value: T) -> (Self, &'a mut T) { - let (to_init, remainder) = self.remainder.split_first_mut().unwrap(); - let init = to_init.write(value); - (Self { remainder }, init) - } - - fn extend( - self, - iter: impl IntoIterator + ExactSizeIterator, - ) -> (Self, &'a mut [T]) { - let (to_init, remainder) = self.remainder.split_at_mut(iter.len()); - - for (value, to_init) in iter.into_iter().zip(to_init.iter_mut()) { - to_init.write(value); - } - - // we can't use the safe (yet unstable) MaybeUninit::write_slice() here because of having an iterator to write - - let init = { - // SAFETY: The loop above has initialized exactly as many items as to_init is - // long, so it is safe to cast away the MaybeUninit wrapper into T. - - // Additional safety docs from unstable slice_assume_init_mut - // SAFETY: similar to safety notes for `slice_get_ref`, but we have a - // mutable reference which is also guaranteed to be valid for writes. - unsafe { mem::transmute::<&mut [MaybeUninit], &mut [T]>(to_init) } - }; - (Self { remainder }, init) - } - } - - let mut writes = Vec::with_capacity(desc.entries.len()); - let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); - let mut buffer_infos = ExtendStack::from_vec_capacity(&mut buffer_infos); - let mut image_infos = Vec::with_capacity(desc.samplers.len() + desc.textures.len()); - let mut image_infos = ExtendStack::from_vec_capacity(&mut image_infos); - // TODO: This length could be reduced to just the number of top-level acceleration - // structure bindings, where multiple consecutive TLAS bindings that are set via - // one `WriteDescriptorSet` count towards one "info" struct, not the total number of - // acceleration structure bindings to write: - let mut acceleration_structure_infos = - Vec::with_capacity(desc.acceleration_structures.len()); - let mut acceleration_structure_infos = - ExtendStack::from_vec_capacity(&mut acceleration_structure_infos); - let mut raw_acceleration_structures = - Vec::with_capacity(desc.acceleration_structures.len()); - let mut raw_acceleration_structures = - ExtendStack::from_vec_capacity(&mut raw_acceleration_structures); - for entry in desc.entries { - let (ty, size) = desc.layout.types[entry.binding as usize]; - if size == 0 { - continue; // empty slot - } - let mut write = vk::WriteDescriptorSet::default() - .dst_set(*set.raw()) - .dst_binding(entry.binding) - .descriptor_type(ty); - - write = match ty { - vk::DescriptorType::SAMPLER => { - let start = entry.resource_index; - let end = start + entry.count; - let local_image_infos; - (image_infos, local_image_infos) = - image_infos.extend(desc.samplers[start as usize..end as usize].iter().map( - |sampler| vk::DescriptorImageInfo::default().sampler(sampler.raw), - )); - write.image_info(local_image_infos) - } - vk::DescriptorType::SAMPLED_IMAGE | vk::DescriptorType::STORAGE_IMAGE => { - let start = entry.resource_index; - let end = start + entry.count; - let local_image_infos; - (image_infos, local_image_infos) = - image_infos.extend(desc.textures[start as usize..end as usize].iter().map( - |binding| { - let layout = conv::derive_image_layout( - binding.usage, - binding.view.attachment.view_format, - ); - vk::DescriptorImageInfo::default() - .image_view(binding.view.raw) - .image_layout(layout) - }, - )); - write.image_info(local_image_infos) - } - vk::DescriptorType::UNIFORM_BUFFER - | vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC - | vk::DescriptorType::STORAGE_BUFFER - | vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => { - let start = entry.resource_index; - let end = start + entry.count; - let local_buffer_infos; - (buffer_infos, local_buffer_infos) = - buffer_infos.extend(desc.buffers[start as usize..end as usize].iter().map( - |binding| { - vk::DescriptorBufferInfo::default() - .buffer(binding.buffer.raw) - .offset(binding.offset) - .range( - binding.size.map_or(vk::WHOLE_SIZE, wgt::BufferSize::get), - ) - }, - )); - write.buffer_info(local_buffer_infos) - } - vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { - let start = entry.resource_index; - let end = start + entry.count; - - let local_raw_acceleration_structures; - ( - raw_acceleration_structures, - local_raw_acceleration_structures, - ) = raw_acceleration_structures.extend( - desc.acceleration_structures[start as usize..end as usize] - .iter() - .map(|acceleration_structure| acceleration_structure.raw), - ); - - let local_acceleration_structure_infos; - ( - acceleration_structure_infos, - local_acceleration_structure_infos, - ) = acceleration_structure_infos.extend_one( - vk::WriteDescriptorSetAccelerationStructureKHR::default() - .acceleration_structures(local_raw_acceleration_structures), - ); - - write - .descriptor_count(entry.count) - .push_next(local_acceleration_structure_infos) - } - _ => unreachable!(), - }; - - writes.push(write); - } - - unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; - - self.counters.bind_groups.add(1); + self.write_descriptors( + &set, + desc.layout, + desc.buffers, + desc.samplers, + desc.textures, + desc.entries, + desc.acceleration_structures, + ); Ok(super::BindGroup { set }) } + unsafe fn update_bind_group( + &self, + bind_group: &::BindGroup, + desc: &crate::UpdateBindGroupDescriptor< + ::BindGroupLayout, + ::Buffer, + ::Sampler, + ::TextureView, + ::AccelerationStructure, + >, + ) -> Result<(), crate::DeviceError> { + self.write_descriptors( + &bind_group.set, + desc.layout, + desc.buffers, + desc.samplers, + desc.textures, + desc.entries, + desc.acceleration_structures, + ); + Ok(()) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { unsafe { self.desc_allocator