alexheretic · alexheretic · Feb 20, 2022 · Feb 18, 2022 · Feb 18, 2022 · Feb 18, 2022
diff --git a/experiments/Cargo.toml b/experiments/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "experiments"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+spin_sleep = { path = ".." }
+num_cpus = "1.13.1"
+rand = "0.8.5"
diff --git a/experiments/README.md b/experiments/README.md
@@ -0,0 +1,107 @@
+# spin_sleep experiments
+Experiments to measure latency all machine specific & non-deterministic but are used to determine
+good default settings for _spin_sleep_.
+
+## native_sleep_accuracy
+Call OS native sleep for **1ns** and see how long it actually takes.
+
+```sh
+cargo run --bin native_sleep_accuracy --release
+```
+
+**Linux example output** *
+```
+average: 53.04µs, best : 7.95µs, worst: 85.238µs
+```
+
+**Windows example output** *
+```
+average: 2.012432ms, best : 2.0069ms, worst: 2.1455ms
+```
+
+## spin_strategy_latency
+Measure `SpinStrategy` latencies and spin counts across various wait durations
+_5ms, 900µs, 5µs, 100ns_.
+
+```sh
+cargo run --bin spin_strategy_latency --release
+```
+
+**Linux example output** *
+```
+warming up...
+5ms    None          avg-spins: 191610   avg-actual: 5.000044ms
+5ms    SpinLoopHint  avg-spins: 176594   avg-actual: 5.000045ms
+5ms    YieldThread   avg-spins: 38366    avg-actual: 5.000105ms
+900µs  None          avg-spins: 34340    avg-actual: 900.05µs
+900µs  SpinLoopHint  avg-spins: 31633    avg-actual: 900.052µs
+900µs  YieldThread   avg-spins: 6843     avg-actual: 900.104µs
+5µs    None          avg-spins: 186      avg-actual: 5.04µs
+5µs    SpinLoopHint  avg-spins: 173      avg-actual: 5.048µs
+5µs    YieldThread   avg-spins: 38       avg-actual: 5.075µs
+100ns  None          avg-spins: 3        avg-actual: 135ns
+100ns  SpinLoopHint  avg-spins: 3        avg-actual: 132ns
+100ns  YieldThread   avg-spins: 1        avg-actual: 181ns
+```
+
+**Windows example output** *
+```
+warming up...
+5ms    None          avg-spins: 158591   avg-actual: 5ms
+5ms    SpinLoopHint  avg-spins: 134568   avg-actual: 5ms
+5ms    YieldThread   avg-spins: 50380    avg-actual: 5.000039ms
+900µs  None          avg-spins: 28491    avg-actual: 900µs
+900µs  SpinLoopHint  avg-spins: 24128    avg-actual: 900.002µs
+900µs  YieldThread   avg-spins: 9070     avg-actual: 900.033µs
+5µs    None          avg-spins: 155      avg-actual: 5µs
+5µs    SpinLoopHint  avg-spins: 133      avg-actual: 5µs
+5µs    YieldThread   avg-spins: 49       avg-actual: 5.042µs
+100ns  None          avg-spins: 0        avg-actual: 100ns
+100ns  SpinLoopHint  avg-spins: 0        avg-actual: 100ns
+100ns  YieldThread   avg-spins: 1        avg-actual: 102ns
+```
+
+## spin_strategy_latency under load
+Do the same measurement as above but while all cores are being stressed.
+
+```sh
+cargo run --bin spin_strategy_latency --release -- load
+```
+
+**Linux example output** *
+```
+Simulating 16 thread load
+warming up...
+5ms    None          avg-spins: 159018   avg-actual: 5.000058ms
+5ms    SpinLoopHint  avg-spins: 122263   avg-actual: 5.000065ms
+5ms    YieldThread   avg-spins: 23265    avg-actual: 5.000327ms
+900µs  None          avg-spins: 27748    avg-actual: 938.427µs
+900µs  SpinLoopHint  avg-spins: 21727    avg-actual: 900.062µs
+900µs  YieldThread   avg-spins: 4054     avg-actual: 901.31µs
+5µs    None          avg-spins: 157      avg-actual: 5.055µs
+5µs    SpinLoopHint  avg-spins: 122      avg-actual: 5.057µs
+5µs    YieldThread   avg-spins: 23       avg-actual: 5.07µs
+100ns  None          avg-spins: 2        avg-actual: 147ns
+100ns  SpinLoopHint  avg-spins: 1        avg-actual: 135ns
+100ns  YieldThread   avg-spins: 1        avg-actual: 278ns
+```
+
+**Windows example output** *
+```
+Simulating 16 thread load
+warming up...
+5ms    None          avg-spins: 105568   avg-actual: 5.838449ms
+5ms    SpinLoopHint  avg-spins: 79548    avg-actual: 5.608363ms
+5ms    YieldThread   avg-spins: 1        avg-actual: 17.526351ms
+900µs  None          avg-spins: 19461    avg-actual: 1.127537ms
+900µs  SpinLoopHint  avg-spins: 14578    avg-actual: 1.326708ms
+900µs  YieldThread   avg-spins: 1        avg-actual: 17.526448ms
+5µs    None          avg-spins: 108      avg-actual: 5µs
+5µs    SpinLoopHint  avg-spins: 79       avg-actual: 6.298µs
+5µs    YieldThread   avg-spins: 1        avg-actual: 11.417271ms
+100ns  None          avg-spins: 1        avg-actual: 101ns
+100ns  SpinLoopHint  avg-spins: 0        avg-actual: 102ns
+100ns  YieldThread   avg-spins: 0        avg-actual: 7.716038ms
+```
+
+\* _Measured 2022-02-18 with a AMD 5800X_.
diff --git a/experiments/src/bin/native_sleep_accuracy.rs b/experiments/src/bin/native_sleep_accuracy.rs
@@ -0,0 +1,32 @@
+use std::time::{Duration, Instant};
+
+fn main() {
+    if cfg!(debug_assertions) {
+        eprintln!("Should run with `--release`");
+        std::process::exit(1);
+    }
+
+    let mut best = Duration::from_secs(100);
+    let mut sum = Duration::from_secs(0);
+    let mut worst = Duration::from_secs(0);
+
+    for _ in 0..100 {
+        let before = Instant::now();
+        spin_sleep::native_sleep(Duration::new(0, 1));
+        let elapsed = before.elapsed();
+        sum += elapsed;
+        if elapsed < best {
+            best = elapsed;
+        }
+        if elapsed > worst {
+            worst = elapsed;
+        }
+    }
+
+    println!(
+        "average: {:?}, best : {:?}, worst: {:?}",
+        Duration::from_nanos(u64::try_from(sum.as_nanos() / 100).unwrap()),
+        best,
+        worst,
+    );
+}
diff --git a/experiments/src/bin/spin_strategy_latency.rs b/experiments/src/bin/spin_strategy_latency.rs
@@ -0,0 +1,71 @@
+use std::time::{Duration, Instant};
+
+fn main() {
+    if cfg!(debug_assertions) {
+        eprintln!("Should run with `--release`");
+        std::process::exit(1);
+    }
+
+    if std::env::args().nth(1).as_deref() == Some("load") {
+        let cpus = num_cpus::get();
+        eprintln!("Simulating {cpus} thread load");
+        for _ in 0..cpus {
+            std::thread::spawn(|| {
+                use rand::Rng;
+                let mut rng = rand::thread_rng();
+                while rng.gen::<u64>() > 0 {}
+            });
+        }
+
+        std::thread::sleep(Duration::from_secs(1));
+    }
+
+    // warmup
+    eprintln!("warming up...");
+    for _ in 0..200 {
+        let before = Instant::now();
+        while before.elapsed() < Duration::from_millis(5) {}
+    }
+
+    for duration in [
+        Duration::from_millis(5),
+        Duration::from_micros(900),
+        Duration::from_micros(5),
+        Duration::from_nanos(100),
+    ] {
+        for strategy in [
+            SpinStrategy::None,
+            SpinStrategy::SpinLoopHint,
+            SpinStrategy::YieldThread,
+        ] {
+            let mut sum = Duration::from_secs(0);
+            let mut spins = 0_u32;
+
+            for _ in 0..100 {
+                let before = Instant::now();
+                while before.elapsed() < duration {
+                    match strategy {
+                        SpinStrategy::YieldThread => std::thread::yield_now(),
+                        SpinStrategy::SpinLoopHint => std::hint::spin_loop(),
+                        SpinStrategy::None => {}
+                    }
+                    spins += 1;
+                }
+                sum += before.elapsed();
+            }
+            println!(
+                "{duration: <6?} {: <13} avg-spins: {:<8} avg-actual: {:?}",
+                format!("{strategy:?}"),
+                spins / 100,
+                Duration::from_nanos(u64::try_from(sum.as_nanos() / 100).unwrap()),
+            );
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+enum SpinStrategy {
+    None,
+    YieldThread,
+    SpinLoopHint,
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -71,9 +71,13 @@ pub struct SpinSleeper {
 #[cfg(not(windows))]
 const DEFAULT_NATIVE_SLEEP_ACCURACY: SubsecondNanoseconds = 125_000;
 
+/// Asks the OS to put the current thread to sleep for at least the specified amount of time.
+///
+/// **Windows**: Automatically selects the best native sleep accuracy generally achieving ~1ms
+/// native sleep accuracy, instead of default ~16ms.
 #[cfg(not(windows))]
 #[inline]
-pub(crate) fn thread_sleep(duration: Duration) {
+pub fn native_sleep(duration: Duration) {
     thread::sleep(duration)
 }
 
@@ -96,9 +100,13 @@ static MIN_TIME_PERIOD: once_cell::sync::Lazy<winapi::shared::minwindef::UINT> =
         }
     });
 
+/// Asks the OS to put the current thread to sleep for at least the specified amount of time.
+///
+/// **Windows**: Automatically selects the best native sleep accuracy generally achieving ~1ms
+/// native sleep accuracy, instead of default ~16ms.
 #[cfg(windows)]
 #[inline]
-pub(crate) fn thread_sleep(duration: Duration) {
+pub fn native_sleep(duration: Duration) {
     unsafe {
         use winapi::um::timeapi::{timeBeginPeriod, timeEndPeriod};
         timeBeginPeriod(*MIN_TIME_PERIOD);
@@ -140,14 +148,27 @@ impl SpinSleeper {
     /// **Windows**: Automatically selects the best native sleep accuracy generally achieving ~1ms
     /// native sleep accuracy, instead of default ~16ms.
     pub fn sleep(self, duration: Duration) {
+        self.sleep_with(SpinStrategy::default(), duration)
+    }
+
+    /// Puts the current thread to sleep, if duration is long enough, then spins until the
+    /// specified duration has elapsed using the specified [`SpinStrategy`].
+    ///
+    /// **Windows**: Automatically selects the best native sleep accuracy generally achieving ~1ms
+    /// native sleep accuracy, instead of default ~16ms.
+    #[inline]
+    pub fn sleep_with(self, strategy: SpinStrategy, duration: Duration) {
         let start = Instant::now();
         let accuracy = Duration::new(0, self.native_accuracy_ns);
         if duration > accuracy {
-            thread_sleep(duration - accuracy);
+            native_sleep(duration - accuracy);
         }
         // spin the rest of the duration
         while start.elapsed() < duration {
-            thread::yield_now();
+            match strategy {
+                SpinStrategy::YieldThread => thread::yield_now(),
+                SpinStrategy::SpinLoopHint => std::hint::spin_loop(),
+            }
         }
     }
 
@@ -188,6 +209,31 @@ pub fn sleep(duration: Duration) {
     SpinSleeper::default().sleep(duration);
 }
 
+#[derive(Debug, Clone, Copy)]
+#[non_exhaustive]
+pub enum SpinStrategy {
+    /// Call [`std::thread::yield_now`] while spinning.
+    YieldThread,
+    /// Call [`std::hint::spin_loop`] while spinning.
+    SpinLoopHint,
+}
+
+/// Per-OS default strategy.
+/// * Windows  `SpinLoopHint`
+/// * !Windows `YieldThread`
+impl Default for SpinStrategy {
+    #[inline]
+    #[cfg(windows)]
+    fn default() -> Self {
+        Self::SpinLoopHint
+    }
+    #[inline]
+    #[cfg(not(windows))]
+    fn default() -> Self {
+        Self::YieldThread
+    }
+}
+
 // Not run unless specifically enabled with `cargo test --features "nondeterministic_tests"`
 // Travis does not do well with these tests, as they require a certain CPU priority.
 #[cfg(feature = "nondeterministic_tests")]
@@ -312,33 +358,3 @@ mod spin_sleep_test {
         });
     }
 }
-
-#[test]
-#[ignore]
-fn print_estimated_thread_sleep_accuracy() {
-    let mut best = Duration::from_secs(100);
-    let mut sum = Duration::from_secs(0);
-    let mut worst = Duration::from_secs(0);
-
-    for _ in 0..100 {
-        let before = Instant::now();
-        thread_sleep(Duration::new(0, 1));
-        let elapsed = before.elapsed();
-        sum += elapsed;
-        if elapsed < best {
-            best = elapsed;
-        }
-        if elapsed > worst {
-            worst = elapsed;
-        }
-    }
-
-    println!(
-        "average: {:?}, best : {:?}, worst: {:?}",
-        Duration::from_nanos((sum.subsec_nanos() / 100).into()),
-        best,
-        worst,
-    );
-
-    panic!("Manual use only, ignore when done");
-}
diff --git a/src/loop_helper.rs b/src/loop_helper.rs
@@ -99,7 +99,7 @@ impl LoopHelperBuilder {
         LoopHelper {
             target_delta: Duration::from_secs_f64(1.0 / target_rate.into()),
             report_interval: interval,
-            sleeper: self.sleeper.unwrap_or_else(SpinSleeper::default),
+            sleeper: self.sleeper.unwrap_or_default(),
             last_report: now,
             last_loop_start: now,
             delta_sum: Duration::from_secs(0),
@@ -154,7 +154,7 @@ impl LoopHelper {
     pub fn loop_sleep_no_spin(&mut self) {
         let elapsed = self.last_loop_start.elapsed();
         if elapsed < self.target_delta {
-            thread_sleep(self.target_delta - elapsed);
+            native_sleep(self.target_delta - elapsed);
         }
     }