Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wgpu-core] Call flush_mapped_ranges when unmapping write-mapped buffers #6089

Merged
merged 5 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2418,7 +2418,9 @@ impl Global {
Ok((ptr, range_size))
}
resource::BufferMapState::Active {
ref ptr, ref range, ..
ref mapping,
ref range,
..
} => {
if offset < range.start {
return Err(BufferAccessError::OutOfBoundsUnderrun {
Expand All @@ -2437,7 +2439,7 @@ impl Global {
let relative_offset = (offset - range.start) as isize;
unsafe {
Ok((
NonNull::new_unchecked(ptr.as_ptr().offset(relative_offset)),
NonNull::new_unchecked(mapping.ptr.as_ptr().offset(relative_offset)),
range_size,
))
}
Expand Down
11 changes: 7 additions & 4 deletions wgpu-core/src/device/life.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,10 @@ impl<A: HalApi> LifetimeTracker<A> {
host,
snatch_guard,
) {
Ok(ptr) => {
Ok(mapping) => {
*buffer.map_state.lock() = resource::BufferMapState::Active {
ptr,
range: pending_mapping.range.start..pending_mapping.range.start + size,
mapping,
range: pending_mapping.range.clone(),
host,
};
Ok(())
Expand All @@ -406,7 +406,10 @@ impl<A: HalApi> LifetimeTracker<A> {
}
} else {
*buffer.map_state.lock() = resource::BufferMapState::Active {
ptr: std::ptr::NonNull::dangling(),
mapping: hal::BufferMapping {
ptr: std::ptr::NonNull::dangling(),
is_coherent: true,
},
range: pending_mapping.range,
host: pending_mapping.op.host,
};
Expand Down
22 changes: 8 additions & 14 deletions wgpu-core/src/device/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use std::os::raw::c_char;
use thiserror::Error;
use wgt::{BufferAddress, DeviceLostReason, TextureFormat};

use std::{iter, num::NonZeroU32, ptr};
use std::{iter, num::NonZeroU32};

pub mod any_device;
pub(crate) mod bgl;
Expand Down Expand Up @@ -307,21 +307,18 @@ fn map_buffer<A: HalApi>(
size: BufferAddress,
kind: HostMap,
snatch_guard: &SnatchGuard,
) -> Result<ptr::NonNull<u8>, BufferAccessError> {
) -> Result<hal::BufferMapping, BufferAccessError> {
let raw_buffer = buffer.try_raw(snatch_guard)?;
let mapping = unsafe {
raw.map_buffer(raw_buffer, offset..offset + size)
.map_err(DeviceError::from)?
};

*buffer.sync_mapped_writes.lock() = match kind {
HostMap::Read if !mapping.is_coherent => unsafe {
if !mapping.is_coherent && kind == HostMap::Read {
unsafe {
raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size));
None
},
HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
_ => None,
};
}
}

assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
Expand All @@ -339,9 +336,6 @@ fn map_buffer<A: HalApi>(
// If this is a write mapping zeroing out the memory here is the only
// reasonable way as all data is pushed to GPU anyways.

// No need to flush if it is flushed later anyways.
let zero_init_needs_flush_now =
mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none();
let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };

for uninitialized in buffer
Expand All @@ -355,12 +349,12 @@ fn map_buffer<A: HalApi>(
(uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
mapped[fill_range].fill(0);

if zero_init_needs_flush_now {
if !mapping.is_coherent && kind == HostMap::Read {
unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
}
}

Ok(mapping.ptr)
Ok(mapping)
}

#[derive(Clone, Debug)]
Expand Down
11 changes: 6 additions & 5 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,6 @@ impl<A: HalApi> Device<A> {
rank::BUFFER_INITIALIZATION_STATUS,
BufferInitTracker::new(aligned_size),
),
sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
label: desc.label.to_string(),
tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
Expand All @@ -611,8 +610,11 @@ impl<A: HalApi> Device<A> {
} else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) {
// buffer is mappable, so we are just doing that at start
let map_size = buffer.size;
let ptr = if map_size == 0 {
std::ptr::NonNull::dangling()
let mapping = if map_size == 0 {
hal::BufferMapping {
ptr: std::ptr::NonNull::dangling(),
is_coherent: true,
}
} else {
let snatch_guard: SnatchGuard = self.snatchable_lock.read();
map_buffer(
Expand All @@ -625,7 +627,7 @@ impl<A: HalApi> Device<A> {
)?
};
*buffer.map_state.lock() = resource::BufferMapState::Active {
ptr,
mapping,
range: 0..map_size,
host: HostMap::Write,
};
Expand Down Expand Up @@ -694,7 +696,6 @@ impl<A: HalApi> Device<A> {
rank::BUFFER_INITIALIZATION_STATUS,
BufferInitTracker::new(0),
),
sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
label: desc.label.to_string(),
tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
Expand Down
1 change: 0 additions & 1 deletion wgpu-core/src/lock/rank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ define_lock_ranks! {

rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { }
rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
rank DEVICE_FENCE "Device::fence" followed by { }
#[allow(dead_code)]
Expand Down
16 changes: 11 additions & 5 deletions wgpu-core/src/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ pub(crate) enum BufferMapState<A: HalApi> {
Waiting(BufferPendingMapping<A>),
/// Mapped
Active {
ptr: NonNull<u8>,
mapping: hal::BufferMapping,
range: hal::MemoryRange,
host: HostMap,
},
Expand Down Expand Up @@ -431,7 +431,6 @@ pub struct Buffer<A: HalApi> {
pub(crate) usage: wgt::BufferUsages,
pub(crate) size: wgt::BufferAddress,
pub(crate) initialization_status: RwLock<BufferInitTracker>,
pub(crate) sync_mapped_writes: Mutex<Option<hal::MemoryRange>>,
/// The `label` from the descriptor used to create the resource.
pub(crate) label: String,
pub(crate) tracking_data: TrackingData,
Expand Down Expand Up @@ -669,13 +668,18 @@ impl<A: HalApi> Buffer<A> {
BufferMapState::Waiting(pending) => {
return Ok(Some((pending.op, Err(BufferAccessError::MapAborted))));
}
BufferMapState::Active { ptr, range, host } => {
BufferMapState::Active {
mapping,
range,
host,
} => {
#[allow(clippy::collapsible_if)]
if host == HostMap::Write {
#[cfg(feature = "trace")]
if let Some(ref mut trace) = *device.trace.lock() {
let size = range.end - range.start;
let data = trace.make_binary("bin", unsafe {
std::slice::from_raw_parts(ptr.as_ptr(), size as usize)
std::slice::from_raw_parts(mapping.ptr.as_ptr(), size as usize)
});
trace.add(trace::Action::WriteBuffer {
id: buffer_id,
Expand All @@ -684,7 +688,9 @@ impl<A: HalApi> Buffer<A> {
queued: false,
});
}
let _ = (ptr, range);
if !mapping.is_coherent {
unsafe { device.raw().flush_mapped_ranges(raw_buf, iter::once(range)) };
}
}
unsafe { device.raw().unmap_buffer(raw_buf) };
}
Expand Down
28 changes: 18 additions & 10 deletions wgpu-hal/src/gles/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ impl crate::Device for super::Device {
size: desc.size,
map_flags: 0,
data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))),
offset_of_current_mapping: Arc::new(Mutex::new(0)),
});
}

Expand Down Expand Up @@ -635,6 +636,7 @@ impl crate::Device for super::Device {
size: desc.size,
map_flags,
data,
offset_of_current_mapping: Arc::new(Mutex::new(0)),
})
}

Expand Down Expand Up @@ -668,6 +670,7 @@ impl crate::Device for super::Device {
unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) };
slice.as_mut_ptr()
} else {
*buffer.offset_of_current_mapping.lock().unwrap() = range.start;
unsafe {
gl.map_buffer_range(
buffer.target,
Expand All @@ -693,6 +696,7 @@ impl crate::Device for super::Device {
unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
unsafe { gl.unmap_buffer(buffer.target) };
unsafe { gl.bind_buffer(buffer.target, None) };
*buffer.offset_of_current_mapping.lock().unwrap() = 0;
}
}
}
Expand All @@ -701,16 +705,20 @@ impl crate::Device for super::Device {
I: Iterator<Item = crate::MemoryRange>,
{
if let Some(raw) = buffer.raw {
let gl = &self.shared.context.lock();
unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
for range in ranges {
unsafe {
gl.flush_mapped_buffer_range(
buffer.target,
range.start as i32,
(range.end - range.start) as i32,
)
};
if buffer.data.is_none() {
let gl = &self.shared.context.lock();
unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
for range in ranges {
let offset_of_current_mapping =
*buffer.offset_of_current_mapping.lock().unwrap();
unsafe {
gl.flush_mapped_buffer_range(
buffer.target,
(range.start - offset_of_current_mapping) as i32,
(range.end - range.start) as i32,
)
};
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions wgpu-hal/src/gles/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ pub struct Buffer {
size: wgt::BufferAddress,
map_flags: u32,
data: Option<Arc<std::sync::Mutex<Vec<u8>>>>,
offset_of_current_mapping: Arc<std::sync::Mutex<wgt::BufferAddress>>,
}

#[cfg(send_sync)]
Expand Down
Loading