From d5393a7c9d3a63d8b036935110511c22fb462181 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Sun, 14 Aug 2022 19:23:18 +0200 Subject: [PATCH 1/9] Add atomic memcpy RFC. --- text/0000-atomic-memcpy.md | 227 +++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 text/0000-atomic-memcpy.md diff --git a/text/0000-atomic-memcpy.md b/text/0000-atomic-memcpy.md new file mode 100644 index 00000000000..1a8893c4570 --- /dev/null +++ b/text/0000-atomic-memcpy.md @@ -0,0 +1,227 @@ +- Feature Name: `atomic_memcpy` +- Start Date: 2022-08-14 +- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) + +# Summary + +This is a proposal to add `AtomicPerByte`, to represent _tearable atomics_. +This makes it possible to properly implement a _sequence lock_ in Rust. + +# The Problem + +It's currently not possible to implement an efficient and perfectly +(theoretically) correct sequence lock in Rust. + +Unlike most locking mechanisms, a sequence lock doesn't prevent a race +to access the data it projects. +Instead, it detects a race only after the load operation already happened, +and retries it if the load operation raced with a write operation. + +A sequence lock in Rust looks something like this: + +```rust +// Incomplete example + +pub struct SeqLock { + seq: AtomicUsize, + data: UnsafeCell, +} + +unsafe impl Sync for SeqLock {} + +impl SeqLock { + /// Safety: Only call from one thread. + pub unsafe fn write(&self, value: T) { + self.seq.fetch_add(1, Relaxed); + write_data(&mut self.data, value, Release); + self.seq.fetch_add(1, Release); + } + + pub fn read(&self) -> T { + loop { + let s1 = self.seq.load(Acquire); + let data = read_data(&self.data, Acquire); + let s2 = self.seq.load(Relaxed); + if s1 & 1 == 0 && s1 == s2 { + return unsafe { assume_valid(data) }; + } + } + } +} +``` + +The `write_data` and `read_data` calls can happen concurrently. +The `write` method increments the counter before and after, +such that the counter is odd during `write_data`. +The `read` function will repeat `read_data` until +the counter was identical and even both before and after reading. +This way, `assume_valid` is only ever called on data that was +not the result of a race. + +A big question is how to implement `write_data`, `read_data`, and `assume_valid` +in Rust in an efficient way while satisfying the memory model. + +The somewhat popular `seqlock` crate and similar implementations found in the ecosystem +all use a regular non-atomic write (preceded by an atomic fence) for writing, +and `ptr::read_volatile` (followed by an atomic fence) for reading. +This "works" "fine", but is technically undefined behavior. + +The C++ and Rust memory model don't allow for data races, +so don't allow for a data race to be detected after the fact; +that's too late. + +All of the data would have to be written and read through +atomic operations to prevent such a data race. +We don't need the atomicity of the data as a whole though; +it's fine if there's tearing, since we re-start on a race anyway. + +# The C++ Solution + +C++'s [P1478] proposes the addition of these two functions to the C++ standard +library to solve this problem: + +```cpp +void atomic_load_per_byte_memcpy(void *dest, void *source, size_t, memory_order); +void atomic_store_per_byte_memcpy(void *dest, void *source, size_t, memory_order); +``` + +The first one is effectively a series of `AtomicU8::load`s followed by a memory fence, +while the second one is basically a memory fence followed by series of `AtomicU8::store`s. +Except the implementation can be much more efficient. +The implementation is allowed to load/store the bytes in any order, +and doesn't have to operate on individual bytes. + +The memory order can only be relaxed, acquire (for load), and release (for store). +Sequentially consistent ordering for these operations is disallowed, +since it's not obvious what that means for these tearable operations. + +# The Rust Solution + +While C++'s solution can be easily copy-pasted into Rust with a nearly identical signature, +it wouldn't fit with the rest of our atomic APIs. + +All our atomic operations happen through the `Atomic*` types, +and we don't have atomic operations that operate on raw pointers. +(Other than as unstable intrinsics.) + +Adding this functionality as a variant on `copy_nonatomic`, similar to the C++ solution, +would not be very ergonomic an can easily result in subtle bugs causing undefined behavior. + +Instead, I propose to add a `AtomicPerByte` type +similar to our existing atomic types: a `Sync` storage for a `T` +that can be written to and read from by multiple threads concurrently. + +The `SeqLock` implementation above would use this type instead of an `UnsafeCell`. +It'd no longer need an unsafe `Sync` implementation, +since the `AtomicPerByte` type can be shared between threads safely. + +This type has a (safe!) `store` method consuming a `T`, +and a (safe!) `load` method producing a `MaybeUninit`. +The `MaybeUninit` type is used to represent the potentially invalid state +the data might be in, since it might be the result of tearing during a race. + +Only after confirming that there was no race and the data is valid +can one safely use `MaybeUninit::assert_init` to get the actual `T` out. + +# Full API Overview + +The `AtomicPerByte` type can be thought of as +the `Sync` (data race free) equivalent of `MaybeUninit`. +It can contain a `T`, but it might be invalid in various ways +due to concurrent store operations. +Its interface resembles a mix of the interfaces of `MaybeUninit` and the atomic types. + +```rust +#[repr(transparent)] +struct AtomicPerByte { inner: UnsafeCell> } + +unsafe impl Sync for AtomicPerByte {} + +impl AtomicPerByte { + pub const fn new(value: T) -> Self; + pub const fn uninit() -> Self; + + pub fn load(&self, ordering: Ordering) -> MaybeUninit; + pub fn store(&self, value: T, ordering: Ordering); + + pub fn load_to(&self, dest: &mut MaybeUninit, ordering: Ordering); + pub fn store_from(&self, src: &MaybeUninit, ordering: Ordering); + + pub fn load_to_slice(this: &[Self], dest: &mut [MaybeUninit], ordering: Ordering); + pub fn store_from_slice(this: &[Self], src: &[MaybeUninit], ordering: Ordering); + + pub const fn into_inner(self) -> MaybeUninit; + + pub const fn as_ptr(&self) -> *const T; + pub const fn as_mut_ptr(&self) -> *mut T; + + pub const fn get_mut(&mut self) -> &mut MaybeUninit; + pub const fn get_mut_slice(this: &mut [Self]) -> &mut [MaybeUninit]; + + pub const fn from_mut(value: &mut MaybeUninit) -> &mut Self; + pub const fn from_mut_slice(slice: &mut [MaybeUninit]) -> &mut [Self]; +} + +impl Debug for AtomicPerByte; +impl From for AtomicPerByte; +impl From> for AtomicPerByte; +``` + +Note how the entire interface is safe. +All potential unsafety is captured by using `MaybeUninit`. + +The load functions panic if the `ordering` is not `Relaxed` or `Acquire`. +The store functions panic if the `ordering` is not `Relaxed` or `Release`. +The slice functions panic if the slices are not of the same length. + +# Drawbacks + +- In order for this to be efficient, we need an additional intrinsic hooking into + special support in LLVM. (Which LLVM needs to have anyway for C++.) + +- It's not immediately obvious this type behaves like a `MaybeUninit`, + making it easy to forget to manually drop any values that implement `Drop`. + + This could be solved by requiring `T: Copy`, or by using a better name for this type. + +- `MaybeUninit` today isn't as ergonomic as it should be. + + For a simple `Copy` type like `u8` it might be nicer to be able to use types like `&[u8]` + rather than `&[MaybeUninit]`, etc. + (But that's a larger problem affecting many other things, like `MaybeUninit`'s interface, + `Read::read_buf`, etc. Maybe this should be solved separately.) + +# Alternatives + +- Instead of a type, this could all be just two functions on raw pointers, + such as something like `std::ptr::copy_nonoverlaping_load_atomic_per_byte`. + + This means having to use `UnsafeCell` and more unsafe code wherever this functionality is used. + + It'd be inconsistent with the other atomic operations. + We don't have e.g. `std::ptr::load_atomic` that operates on pointers either. + +- Require `T: Copy` for `AtomicPerByte`, such that we don't need to worry about + duplicating non-`Copy` data. + + There might be valid use cases with non-`Copy` data. + Also, not all "memcpy'able" data is always marked as `Copy` (e.g. to prevent implicit copies). + +- Leave this to the ecosystem, outside of the standard library. + + Since this requires special compiler support, a community crate would + have to use (probably technically unsound) hacks like volatile operations + or inline assembly. + +# Unresolved questions + +- Should we require `T: Copy`? + + There might be some valid use cases for non-`Copy` data, + but it's easy to accidentally cause undefined behavior by using `load` + to make an extra copy of data that shouldn't be copied. + +- Naming: `AtomicPerByte`? `TearableAtomic`? `NoDataRace`? `NotQuiteAtomic`? + +[P1478]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1478r7.html From e864d8dcd14e7746a6f18a8af02b2188fb8047ba Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Sun, 14 Aug 2022 19:40:32 +0200 Subject: [PATCH 2/9] Add number in atomic memcpy rfc. --- text/{0000-atomic-memcpy.md => 3301-atomic-memcpy.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename text/{0000-atomic-memcpy.md => 3301-atomic-memcpy.md} (99%) diff --git a/text/0000-atomic-memcpy.md b/text/3301-atomic-memcpy.md similarity index 99% rename from text/0000-atomic-memcpy.md rename to text/3301-atomic-memcpy.md index 1a8893c4570..216c09a3dd3 100644 --- a/text/0000-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -1,6 +1,6 @@ - Feature Name: `atomic_memcpy` - Start Date: 2022-08-14 -- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- RFC PR: [rust-lang/rfcs#3301](https://github.com/rust-lang/rfcs/pull/3301) - Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) # Summary From d12abe9a9fd72b4d4c958ba54f2d220278ba3543 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Mon, 15 Aug 2022 10:50:15 +0200 Subject: [PATCH 3/9] Fix typo. --- text/3301-atomic-memcpy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 216c09a3dd3..1ba9f12e1d5 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -122,7 +122,7 @@ The `MaybeUninit` type is used to represent the potentially invalid state the data might be in, since it might be the result of tearing during a race. Only after confirming that there was no race and the data is valid -can one safely use `MaybeUninit::assert_init` to get the actual `T` out. +can one safely use `MaybeUninit::assume_init` to get the actual `T` out. # Full API Overview From eb68c3a6996be0168f44cdb08a0e1b4873e60dde Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Mon, 15 Aug 2022 13:18:50 +0200 Subject: [PATCH 4/9] Fix types of C++ API. --- text/3301-atomic-memcpy.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 1ba9f12e1d5..537062e2c57 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -82,8 +82,8 @@ C++'s [P1478] proposes the addition of these two functions to the C++ standard library to solve this problem: ```cpp -void atomic_load_per_byte_memcpy(void *dest, void *source, size_t, memory_order); -void atomic_store_per_byte_memcpy(void *dest, void *source, size_t, memory_order); +void *atomic_load_per_byte_memcpy(void *dest, const void *source, size_t, memory_order); +void *atomic_store_per_byte_memcpy(void *dest, const void *source, size_t, memory_order); ``` The first one is effectively a series of `AtomicU8::load`s followed by a memory fence, From e802133610a1ae95fe40ff89000b8faed81d9c5a Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Mon, 15 Aug 2022 13:26:08 +0200 Subject: [PATCH 5/9] Better wording. --- text/3301-atomic-memcpy.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 537062e2c57..62e0f1ec22e 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -211,8 +211,8 @@ The slice functions panic if the slices are not of the same length. - Leave this to the ecosystem, outside of the standard library. Since this requires special compiler support, a community crate would - have to use (probably technically unsound) hacks like volatile operations - or inline assembly. + have to use (platform specific) inline assembly + or (probably technically unsound) hacks like volatile operations. # Unresolved questions From 69fc8b5385a45d558978b85153c0da520ff6e470 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Tue, 2 Jul 2024 17:00:32 +0200 Subject: [PATCH 6/9] Update. --- text/3301-atomic-memcpy.md | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 62e0f1ec22e..4045f1b3a43 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -67,15 +67,17 @@ all use a regular non-atomic write (preceded by an atomic fence) for writing, and `ptr::read_volatile` (followed by an atomic fence) for reading. This "works" "fine", but is technically undefined behavior. -The C++ and Rust memory model don't allow for data races, -so don't allow for a data race to be detected after the fact; +The C++ and Rust memory model doesn't allow for data races, +so doesn't allow for a data race to be detected after the fact; that's too late. All of the data would have to be written and read through -atomic operations to prevent such a data race. +atomic operations to prevent a data race. We don't need the atomicity of the data as a whole though; it's fine if there's tearing, since we re-start on a race anyway. +Additionally, memory fences technically only "interact" with atomic operations, not with volatile operations. + # The C++ Solution C++'s [P1478] proposes the addition of these two functions to the C++ standard @@ -205,7 +207,7 @@ The slice functions panic if the slices are not of the same length. - Require `T: Copy` for `AtomicPerByte`, such that we don't need to worry about duplicating non-`Copy` data. - There might be valid use cases with non-`Copy` data. + There are valid use cases with non-`Copy` data, though, such as [in crossbeam-deque](https://github.com/crossbeam-rs/crossbeam/blob/2d9e7e0f81d3dd3efb1975b6379ea8b05fcf9bdd/crossbeam-deque/src/deque.rs#L60-L78). Also, not all "memcpy'able" data is always marked as `Copy` (e.g. to prevent implicit copies). - Leave this to the ecosystem, outside of the standard library. @@ -214,6 +216,20 @@ The slice functions panic if the slices are not of the same length. have to use (platform specific) inline assembly or (probably technically unsound) hacks like volatile operations. +- Use a new `MaybeTorn` instead of `MaybeUninit`. + + `AtomicPerByte` doesn't _have_ to support uninitialized bytes, + but it does need a wrapper type to represent potentially torn values. + + If Rust had a `MaybeTorn`, we could make it possible to load types like `[bool; _]` or even `f32` without any unsafe code, + since, for those types, combining bytes from different values always results in a valid value. + + However, the use cases for this are very limited, it would require a new trait to mark the types for which this is valid, + and it makes the API a lot more complicated or verbose to use. + + Also, such a API for safely handling torn values can be built on top of the proposed API, + so we can leave that to a (niche) ecosystem crate. + # Unresolved questions - Should we require `T: Copy`? From 6fae14751e101556a0dac0f647ca879414bc0307 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Wed, 3 Jul 2024 14:48:01 +0200 Subject: [PATCH 7/9] Add example. --- text/3301-atomic-memcpy.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 4045f1b3a43..7bb3412c3c3 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -126,6 +126,33 @@ the data might be in, since it might be the result of tearing during a race. Only after confirming that there was no race and the data is valid can one safely use `MaybeUninit::assume_init` to get the actual `T` out. +```rust +pub struct SeqLock { + seq: AtomicUsize, + data: AtomicPerByte, +} + +impl SeqLock { + /// Safety: Only call from one thread. + pub unsafe fn write(&self, value: T) { + self.seq.fetch_add(1, Relaxed); + self.data.store(value, Release); + self.seq.fetch_add(1, Release); + } + + pub fn read(&self) -> T { + loop { + let s1 = self.seq.load(Acquire); + let data = self.data.load(Acquire); + let s2 = self.seq.load(Relaxed); + if s1 & 1 == 0 && s1 == s2 { + return unsafe { data.assume_init() }; + } + } + } +} +``` + # Full API Overview The `AtomicPerByte` type can be thought of as From af86156ab923fb59ef3859c14edc1c873615adfa Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Wed, 3 Jul 2024 15:06:25 +0200 Subject: [PATCH 8/9] Add note on uninitialized state. --- text/3301-atomic-memcpy.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 7bb3412c3c3..6fdfd7cd347 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -257,6 +257,26 @@ The slice functions panic if the slices are not of the same length. Also, such a API for safely handling torn values can be built on top of the proposed API, so we can leave that to a (niche) ecosystem crate. +- Don't allow an uninitialized state. + + Even if we use `MaybeUninit` to represent a 'potentially torn value', + we could still attempt to design an API where we do not allow an uninitialized state. + + It might seem like that results in a much simpler API with `MaybeUninit` replaced by `T` in + methods like `into_inner()` and `get_mut()`, but that is not the case: + + As long as `store()` can be called concurrently by multiple threads, + it is not only the `load()` method that can result in a torn value, + since the `AtomicPerByte` object itself might end up storing a torn value. + + Therefore, even if we disallow uninitialized values, + every method will still have `MaybeUninit` in its signature, + at which point we lose basically all benefits of removing the uninitialized state. + + Removing the uninitialized state does result in a big downside for users who need to add that state back, + as the interface of a `AtomicPerByte>` would result in doubly wrapped `MaybeUninit>` in many places, + which is can be quite unergonomic and confusing. + # Unresolved questions - Should we require `T: Copy`? From 520ab88c43e2f10ad56395d47859ee038ebda928 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Wed, 3 Jul 2024 16:00:30 +0200 Subject: [PATCH 9/9] Update. --- text/3301-atomic-memcpy.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/text/3301-atomic-memcpy.md b/text/3301-atomic-memcpy.md index 6fdfd7cd347..946a0f26666 100644 --- a/text/3301-atomic-memcpy.md +++ b/text/3301-atomic-memcpy.md @@ -171,14 +171,14 @@ impl AtomicPerByte { pub const fn new(value: T) -> Self; pub const fn uninit() -> Self; - pub fn load(&self, ordering: Ordering) -> MaybeUninit; pub fn store(&self, value: T, ordering: Ordering); + pub fn load(&self, ordering: Ordering) -> MaybeUninit; - pub fn load_to(&self, dest: &mut MaybeUninit, ordering: Ordering); pub fn store_from(&self, src: &MaybeUninit, ordering: Ordering); + pub fn load_to(&self, dest: &mut MaybeUninit, ordering: Ordering); - pub fn load_to_slice(this: &[Self], dest: &mut [MaybeUninit], ordering: Ordering); pub fn store_from_slice(this: &[Self], src: &[MaybeUninit], ordering: Ordering); + pub fn load_to_slice(this: &[Self], dest: &mut [MaybeUninit], ordering: Ordering); pub const fn into_inner(self) -> MaybeUninit; @@ -193,12 +193,11 @@ impl AtomicPerByte { } impl Debug for AtomicPerByte; -impl From for AtomicPerByte; impl From> for AtomicPerByte; ``` Note how the entire interface is safe. -All potential unsafety is captured by using `MaybeUninit`. +All potential unsafety is captured by the use of `MaybeUninit`. The load functions panic if the `ordering` is not `Relaxed` or `Acquire`. The store functions panic if the `ordering` is not `Relaxed` or `Release`. @@ -212,7 +211,9 @@ The slice functions panic if the slices are not of the same length. - It's not immediately obvious this type behaves like a `MaybeUninit`, making it easy to forget to manually drop any values that implement `Drop`. - This could be solved by requiring `T: Copy`, or by using a better name for this type. + This could be solved by requiring `T: Copy`, or by using a better name for this type. (See alternatives below.) + + Very clear documentation might be enough, though. - `MaybeUninit` today isn't as ergonomic as it should be.