From 0ba37ac9089d66a281172fbcccbb06ae0de10ed6 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@buoyant.io>
Date: Sat, 4 Jun 2022 11:05:28 -0700
Subject: [PATCH] feat(util): add cache padding inhibitor (#192)

@jamesmunns' PR  #161 added a feature for disabling cache padding in
`cordyceps`. however, `mycelium-util` and `maitake` also cache pad some
values. this branch adds similar feature flags to those crates.

in `mycelium-util`, the flag controls the public `CachePadded` type,
which is used both internally and in `maitake`. in `maiktake`, the
feature just forwards to both `mycelium-util` and `cordyceps`' feature
flags.

Signed-off-by: Eliza Weisman <eliza@buoyant.io>
---
 maitake/Cargo.toml         |   3 +-
 maitake/README.md          |   2 +
 util/Cargo.toml            |   3 ++
 util/README.md             |  19 +++++++
 util/src/lib.rs            |   3 +-
 util/src/sync.rs           |  49 +----------------
 util/src/sync/cache_pad.rs | 108 +++++++++++++++++++++++++++++++++++++
 7 files changed, 137 insertions(+), 50 deletions(-)
 create mode 100644 util/README.md
 create mode 100644 util/src/sync/cache_pad.rs

diff --git a/maitake/Cargo.toml b/maitake/Cargo.toml
index 5ca8387f..bca6acfa 100644
--- a/maitake/Cargo.toml
+++ b/maitake/Cargo.toml
@@ -27,7 +27,8 @@ links = "maitake"
 
 [features]
 default = ["alloc"]
-alloc = []
+alloc = ["cordyceps/alloc"]
+no-cache-pad = ["mycelium-util/no-cache-pad", "cordyceps/no-cache-pad"]
 
 [dependencies]
 mycelium-bitfield = { path = "../bitfield" }
diff --git a/maitake/README.md b/maitake/README.md
index 77fa33e7..a01607a9 100644
--- a/maitake/README.md
+++ b/maitake/README.md
@@ -59,5 +59,7 @@ The following features are available (this list is incomplete; you can help by [
 | Feature | Default | Explanation |
 | :---    | :---    | :---        |
 | `alloc` | `true`  | Enables [`liballoc`] dependency |
+| `no-cache-pad` | `false` | Inhibits cache padding for the [`CachePadded`] struct. When this feature is NOT enabled, the size will be determined based on target platform. |
 
 [`liballoc`]: https://doc.rust-lang.org/alloc/
+[`CachePadded`]: https://mycelium.elizas.website/mycelium_util/sync/struct.cachepadded
\ No newline at end of file
diff --git a/util/Cargo.toml b/util/Cargo.toml
index f37ac3bc..2f8438e1 100644
--- a/util/Cargo.toml
+++ b/util/Cargo.toml
@@ -4,11 +4,14 @@ version = "0.1.0"
 authors = ["Eliza Weisman <eliza@elizas.website>"]
 edition = "2018"
 rust-version = "1.61.0"
+readme = "README.md"
 
 # See more keys and their definitions at
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 [features]
+default = []
 alloc = ["cordyceps/alloc"]
+no-cache-pad = ["cordyceps/no-cache-pad"]
 
 [dependencies]
 tracing = { git = "https://github.com/tokio-rs/tracing", default_features = false, features = ["attributes"] }
diff --git a/util/README.md b/util/README.md
new file mode 100644
index 00000000..58632e89
--- /dev/null
+++ b/util/README.md
@@ -0,0 +1,19 @@
+# mycelium-util
+
+a "standard library for programming in the [mycelium] kernel and related
+libraries.
+
+## features
+
+The following features are available (this list is incomplete; you can help by [expanding it].)
+
+[expanding it]: https://github.com/hawkw/mycelium/edit/main/util/README.md
+
+| Feature | Default | Explanation |
+| :---    | :---    | :---        |
+| `no-cache-pad` | `false` | Inhibits cache padding for the [`CachePadded`] struct. When this feature is NOT enabled, the size will be determined based on target platform. |
+| `alloc`        | `false`  | Enables [`liballoc`] dependency |
+
+[mycelium]: https://mycelium.elizas.website
+[`CachePadded`]: https://mycelium.elizas.website/mycelium_util/sync/struct.cachepadded
+[`liballoc`]: https://doc.rust-lang.org/alloc/
\ No newline at end of file
diff --git a/util/src/lib.rs b/util/src/lib.rs
index d37fedce..35b9f368 100644
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@@ -1,5 +1,4 @@
-//! A "standard library" for programming in the Mycelium kernel and related
-//! libraries.
+#![cfg_attr(docsrs, doc = include_str!("../README.md"))]
 #![cfg_attr(target_os = "none", no_std)]
 #![allow(unused_unsafe)]
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg, doc_cfg_hide))]
diff --git a/util/src/sync.rs b/util/src/sync.rs
index 32dd3d71..84df5466 100644
--- a/util/src/sync.rs
+++ b/util/src/sync.rs
@@ -10,11 +10,8 @@ pub mod once;
 pub mod spin;
 pub use self::once::{InitOnce, Lazy};
 
-use core::{
-    fmt,
-    ops::{Deref, DerefMut},
-};
-
+mod cache_pad;
+pub use self::cache_pad::CachePadded;
 pub mod hint {
     #[cfg(not(loom))]
     pub use core::hint::spin_loop;
@@ -30,14 +27,6 @@ pub(crate) struct Backoff {
     max: u8,
 }
 
-#[cfg_attr(any(target_arch = "x86_64", target_arch = "aarch64"), repr(align(128)))]
-#[cfg_attr(
-    not(any(target_arch = "x86_64", target_arch = "aarch64")),
-    repr(align(64))
-)]
-#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
-pub struct CachePadded<T>(T);
-
 // === impl Backoff ===
 
 impl Backoff {
@@ -76,37 +65,3 @@ impl Default for Backoff {
         Self::new()
     }
 }
-
-// === impl CachePadded ===
-
-impl<T> CachePadded<T> {
-    pub const fn new(value: T) -> Self {
-        Self(value)
-    }
-
-    pub fn into_inner(self) -> T {
-        self.0
-    }
-}
-
-impl<T> Deref for CachePadded<T> {
-    type Target = T;
-
-    #[inline]
-    fn deref(&self) -> &T {
-        &self.0
-    }
-}
-
-impl<T> DerefMut for CachePadded<T> {
-    #[inline]
-    fn deref_mut(&mut self) -> &mut T {
-        &mut self.0
-    }
-}
-
-impl<T: fmt::Debug> fmt::Debug for CachePadded<T> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
diff --git a/util/src/sync/cache_pad.rs b/util/src/sync/cache_pad.rs
new file mode 100644
index 00000000..bde21b6e
--- /dev/null
+++ b/util/src/sync/cache_pad.rs
@@ -0,0 +1,108 @@
+use core::{
+    fmt,
+    ops::{Deref, DerefMut},
+};
+
+pub use self::inner::CachePadded;
+
+/// When configured not to pad to cache alignment, just provide a no-op wrapper struct
+/// This feature is useful for platforms with no data cache, such as many Cortex-M
+/// targets.
+#[cfg(feature = "no-cache-pad")]
+mod inner {
+    /// Aligns the wrapped value to the size of a cache line.
+    ///
+    /// This is used to avoid [false sharing] for values that may be
+    /// accessed concurrently.
+    ///
+    /// # Size/Alignment
+    ///
+    /// The size and alignment of this type depends on the target architecture,
+    /// and on whether or not the `no-cache-pad` feature flag is enabled.
+    ///
+    /// When the `no-cache-pad` crate feature flag is enabled, this is simply a
+    /// no-op wrapper struct. This is intended for use on useful for platforms
+    /// with no data cache, such as many Cortex-M targets.
+    ///
+    /// In other cases, this type is always aligned to the size of a cache line,
+    /// based on the target architecture. On `x86_64`/`aarch64`, a cache line is
+    /// 128 bytes. On all other targets, a cache line is assumed to 64 bytes
+    /// long. This type's size will always be a multiple of the cache line size;
+    /// if the wrapped type is longer than the alignment of a cache line, then
+    /// this type will be padded to multiple cache lines.
+    ///
+    /// [false sharing]: https://en.wikipedia.org/wiki/False_sharing
+    #[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
+    pub struct CachePadded<T>(pub(super) T);
+}
+
+/// When not inhibited, determine cache alignment based on target architecture.
+/// Align to 128 bytes on 64-bit x86/ARM targets, otherwise align to 64 bytes.
+#[cfg(not(feature = "no-cache-pad"))]
+mod inner {
+    /// Aligns the wrapped value to the size of a cache line.
+    ///
+    /// This is used to avoid [false sharing] for values that may be
+    /// accessed concurrently.
+    ///
+    /// # Size/Alignment
+    ///
+    /// The size and alignment of this type depends on the target architecture,
+    /// and on whether or not the `no-cache-pad` feature flag is enabled.
+    ///
+    /// When the `no-cache-pad` crate feature flag is enabled, this is simply a
+    /// no-op wrapper struct. This is intended for use on useful for platforms
+    /// with no data cache, such as many Cortex-M targets.
+    ///
+    /// In other cases, this type is always aligned to the size of a cache line,
+    /// based on the target architecture. On `x86_64`/`aarch64`, a cache line is
+    /// 128 bytes. On all other targets, a cache line is assumed to 64 bytes
+    /// long. This type's size will always be a multiple of the cache line size;
+    /// if the wrapped type is longer than the alignment of a cache line, then
+    /// this type will be padded to multiple cache lines.
+    ///
+    /// [false sharing]: https://en.wikipedia.org/wiki/False_sharing
+    #[cfg_attr(any(target_arch = "x86_64", target_arch = "aarch64"), repr(align(128)))]
+    #[cfg_attr(
+        not(any(target_arch = "x86_64", target_arch = "aarch64")),
+        repr(align(64))
+    )]
+    #[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
+    pub struct CachePadded<T>(pub(super) T);
+}
+
+// === impl CachePadded ===
+
+impl<T> CachePadded<T> {
+    /// Pads `value` to the length of a cache line.
+    pub const fn new(value: T) -> Self {
+        Self(value)
+    }
+
+    /// Unwraps the inner value and returns it.
+    pub fn into_inner(self) -> T {
+        self.0
+    }
+}
+
+impl<T> Deref for CachePadded<T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &T {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for CachePadded<T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut T {
+        &mut self.0
+    }
+}
+
+impl<T: fmt::Debug> fmt::Debug for CachePadded<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}