rust-lang · bors · Dec 14, 2020 · Oct 5, 2020 · Oct 5, 2020 · Oct 5, 2020
diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs
@@ -31,6 +31,8 @@ pub type WORD = u16;
 pub type CHAR = c_char;
 pub type ULONG_PTR = usize;
 pub type ULONG = c_ulong;
+pub type NTSTATUS = LONG;
+pub type ACCESS_MASK = DWORD;
 
 pub type LPBOOL = *mut BOOL;
 pub type LPBYTE = *mut BYTE;
@@ -286,6 +288,8 @@ pub const STACK_SIZE_PARAM_IS_A_RESERVATION: DWORD = 0x00010000;
 
 pub const HEAP_ZERO_MEMORY: DWORD = 0x00000008;
 
+pub const STATUS_SUCCESS: NTSTATUS = 0x00000000;
+
 #[repr(C)]
 #[cfg(not(target_pointer_width = "64"))]
 pub struct WSADATA {
@@ -1096,3 +1100,46 @@ compat_fn! {
         panic!("rwlocks not available")
     }
 }
+compat_fn! {
+    "api-ms-win-core-synch-l1-2-0":
+    pub fn WaitOnAddress(
+        Address: LPVOID,
+        CompareAddress: LPVOID,
+        AddressSize: SIZE_T,
+        dwMilliseconds: DWORD
+    ) -> BOOL {
+        panic!("WaitOnAddress not available")
+    }
+    pub fn WakeByAddressSingle(Address: LPVOID) -> () {
+        // If this api is unavailable, there cannot be anything waiting, because
+        // WaitOnAddress would've panicked. So it's fine to do nothing here.
+    }
+}
+
+compat_fn! {
+    "ntdll":
+    pub fn NtCreateKeyedEvent(
+        KeyedEventHandle: LPHANDLE,
+        DesiredAccess: ACCESS_MASK,
+        ObjectAttributes: LPVOID,
+        Flags: ULONG
+    ) -> NTSTATUS {
+        panic!("keyed events not available")
+    }
+    pub fn NtReleaseKeyedEvent(
+        EventHandle: HANDLE,
+        Key: LPVOID,
+        Alertable: BOOLEAN,
+        Timeout: PLARGE_INTEGER
+    ) -> NTSTATUS {
+        panic!("keyed events not available")
+    }
+    pub fn NtWaitForKeyedEvent(
+        EventHandle: HANDLE,
+        Key: LPVOID,
+        Alertable: BOOLEAN,
+        Timeout: PLARGE_INTEGER
+    ) -> NTSTATUS {
+        panic!("keyed events not available")
+    }
+}
diff --git a/library/std/src/sys/windows/compat.rs b/library/std/src/sys/windows/compat.rs
@@ -34,6 +34,7 @@ macro_rules! compat_fn {
     )*) => ($(
         $(#[$meta])*
         pub mod $symbol {
+            #[allow(unused_imports)]
             use super::*;
             use crate::sync::atomic::{AtomicUsize, Ordering};
             use crate::mem;

diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs
@@ -36,6 +36,7 @@ pub mod rwlock;
 pub mod thread;
 pub mod thread_local_dtor;
 pub mod thread_local_key;
+pub mod thread_parker;
 pub mod time;
 cfg_if::cfg_if! {
     if #[cfg(not(target_vendor = "uwp"))] {

diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs
@@ -0,0 +1,250 @@
+// Thread parker implementation for Windows.
+//
+// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+).
+// This modern API is exactly the same as the futex syscalls the Linux thread
+// parker uses. When These APIs are available, the implementation of this
+// thread parker matches the Linux thread parker exactly.
+//
+// However, when the modern API is not available, this implementation falls
+// back to NT Keyed Events, which are similar, but have some important
+// differences. These are available since Windows XP.
+//
+// WaitOnAddress first checks the state of the thread parker to make sure it no
+// WakeByAddressSingle calls can be missed between updating the parker state
+// and calling the function.
+//
+// NtWaitForKeyedEvent does not have this option, and unconditionally blocks
+// without checking the parker state first. Instead, NtReleaseKeyedEvent
+// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for
+// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed,
+// but we need to be careful not to block unpark() if park_timeout() was woken
+// up by a timeout instead of unpark().
+//
+// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a
+// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure
+// a succesfully awoken park() was awoken by unpark() and not a
+// NtReleaseKeyedEvent call from some other code, as these events are not only
+// matched by the key (address of the parker (state)), but also by this HANDLE.
+// We lazily allocate this handle the first time it is needed.
+//
+// The fast path (calling park() after unpark() was already called) and the
+// possible states are the same for both implementations. This is used here to
+// make sure the fast path does not even check which API to use, but can return
+// right away, independent of the used API. Only the slow paths (which will
+// actually block/wake a thread) check which API is available and have
+// different implementations.
+//
+// Unfortunately, NT Keyed Events are an undocumented Windows API. However:
+// - This API is relatively simple with obvious behaviour, and there are
+//   several (unofficial) articles documenting the details. [1]
+// - `parking_lot` has been using this API for years (on Windows versions
+//   before Windows 8). [2] Many big projects extensively use parking_lot,
+//   such as servo and the Rust compiler itself.
+// - It is the underlying API used by Windows SRW locks and Windows critical
+//   sections. [3] [4]
+// - The source code of the implementations of Wine, ReactOs, and Windows XP
+//   are available and match the expected behaviour.
+// - The main risk with an undocumented API is that it might change in the
+//   future. But since we only use it for older versions of Windows, that's not
+//   a problem.
+// - Even if these functions do not block or wake as we expect (which is
+//   unlikely, see all previous points), this implementation would still be
+//   memory safe. The NT Keyed Events API is only used to sleep/block in the
+//   right place.
+//
+// [1]: http://www.locklessinc.com/articles/keyed_events/
+// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e
+// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c
+// [4]: Windows Internals, Part 1, ISBN 9780735671300
+
+use crate::convert::TryFrom;
+use crate::ptr;
+use crate::sync::atomic::{
+    AtomicI8, AtomicUsize,
+    Ordering::{Acquire, Relaxed, Release},
+};
+use crate::sys::{c, dur2timeout};
+use crate::time::Duration;
+
+pub struct Parker {
+    state: AtomicI8,
+}
+
+const PARKED: i8 = -1;
+const EMPTY: i8 = 0;
+const NOTIFIED: i8 = 1;
+
+// Notes about memory ordering:
+//
+// Memory ordering is only relevant for the relative ordering of operations
+// between different variables. Even Ordering::Relaxed guarantees a
+// monotonic/consistent order when looking at just a single atomic variable.
+//
+// So, since this parker is just a single atomic variable, we only need to look
+// at the ordering guarantees we need to provide to the 'outside world'.
+//
+// The only memory ordering guarantee that parking and unparking provide, is
+// that things which happened before unpark() are visible on the thread
+// returning from park() afterwards. Otherwise, it was effectively unparked
+// before unpark() was called while still consuming the 'token'.
+//
+// In other words, unpark() needs to synchronize with the part of park() that
+// consumes the token and returns.
+//
+// This is done with a release-acquire synchronization, by using
+// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using
+// Ordering::Acquire when reading this state in park() after waking up.
+impl Parker {
+    pub fn new() -> Self {
+        Self { state: AtomicI8::new(EMPTY) }
+    }
+
+    // Assumes this is only called by the thread that owns the Parker,
+    // which means that `self.state != PARKED`.
+    pub unsafe fn park(&self) {
+        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
+        // first case.
+        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
+            return;
+        }
+
+        if c::WaitOnAddress::is_available() {
+            loop {
+                // Wait for something to happen, assuming it's still set to PARKED.
+                c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
+                // Change NOTIFIED=>EMPTY but leave PARKED alone.
+                if self.state.compare_and_swap(NOTIFIED, EMPTY, Acquire) == NOTIFIED {
+                    // Actually woken up by unpark().
+                    return;
+                } else {
+                    // Spurious wake up. We loop to try again.
+                }
+            }
+        } else {
+            // Wait for unpark() to produce this event.
+            c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            // Note that we don't just write EMPTY, but use swap() to also
+            // include an acquire-ordered read to synchronize with unpark()'s
+            // release-ordered write.
+            self.state.swap(EMPTY, Acquire);
+        }
+    }
+
+    // Assumes this is only called by the thread that owns the Parker,
+    // which means that `self.state != PARKED`.
+    pub unsafe fn park_timeout(&self, timeout: Duration) {
+        // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the
+        // first case.
+        if self.state.fetch_sub(1, Acquire) == NOTIFIED {
+            return;
+        }
+
+        if c::WaitOnAddress::is_available() {
+            // Wait for something to happen, assuming it's still set to PARKED.
+            c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            // Note that we don't just write EMPTY, but use swap() to also
+            // include an acquire-ordered read to synchronize with unpark()'s
+            // release-ordered write.
+            if self.state.swap(EMPTY, Acquire) == NOTIFIED {
+                // Actually woken up by unpark().
+            } else {
+                // Timeout or spurious wake up.
+                // We return either way, because we can't easily tell if it was the
+                // timeout or not.
+            }
+        } else {
+            // Need to wait for unpark() using NtWaitForKeyedEvent.
+            let handle = keyed_event_handle();
+
+            // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative
+            // values to indicate a relative time on the monotonic clock.
+            // This is documented here for the underlying KeWaitForSingleObject function:
+            // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject
+            let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) {
+                Ok(t) => -t,
+                Err(_) => i64::MIN,
+            };
+
+            // Wait for unpark() to produce this event.
+            let unparked =
+                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS;
+
+            // Set the state back to EMPTY (from either PARKED or NOTIFIED).
+            let prev_state = self.state.swap(EMPTY, Acquire);
+
+            if !unparked && prev_state == NOTIFIED {
+                // We were awoken by a timeout, not by unpark(), but the state
+                // was set to NOTIFIED, which means we *just* missed an
+                // unpark(), which is now blocked on us to wait for it.
+                // Wait for it to consume the event and unblock that thread.
+                c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut());
+            }
+        }
+    }
+
+    pub fn unpark(&self) {
+        // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and
+        // wake the thread in the first case.
+        //
+        // Note that even NOTIFIED=>NOTIFIED results in a write. This is on
+        // purpose, to make sure every unpark() has a release-acquire ordering
+        // with park().
+        if self.state.swap(NOTIFIED, Release) == PARKED {
+            if c::WakeByAddressSingle::is_available() {
+                unsafe {
+                    c::WakeByAddressSingle(self.ptr());
+                }
+            } else {
+                // If we run NtReleaseKeyedEvent before the waiting thread runs
+                // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
+                // If the waiting thread wakes up before we run NtReleaseKeyedEvent
+                // (e.g. due to a timeout), this blocks until we do wake up a thread.
+                // To prevent this thread from blocking indefinitely in that case,
+                // park_impl() will, after seeing the state set to NOTIFIED after
+                // waking up, call NtWaitForKeyedEvent again to unblock us.
+                unsafe {
+                    c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
+                }
+            }
+        }
+    }
+
+    fn ptr(&self) -> c::LPVOID {
+        &self.state as *const _ as c::LPVOID
+    }
+}
+
+fn keyed_event_handle() -> c::HANDLE {
+    const INVALID: usize = !0;
+    static HANDLE: AtomicUsize = AtomicUsize::new(INVALID);
+    match HANDLE.load(Relaxed) {
+        INVALID => {
+            let mut handle = c::INVALID_HANDLE_VALUE;
+            unsafe {
+                match c::NtCreateKeyedEvent(
+                    &mut handle,
+                    c::GENERIC_READ | c::GENERIC_WRITE,
+                    ptr::null_mut(),
+                    0,
+                ) {
+                    c::STATUS_SUCCESS => {}
+                    r => panic!("Unable to create keyed event handle: error {}", r),
+                }
+            }
+            match HANDLE.compare_exchange(INVALID, handle as usize, Relaxed, Relaxed) {
+                Ok(_) => handle,
+                Err(h) => {
+                    // Lost the race to another thread initializing HANDLE before we did.
+                    // Closing our handle and using theirs instead.
+                    unsafe {
+                        c::CloseHandle(handle);
+                    }
+                    h as c::HANDLE
+                }
+            }
+        }
+        handle => handle as c::HANDLE,
+    }
+}
diff --git a/library/std/src/sys_common/thread_parker/mod.rs b/library/std/src/sys_common/thread_parker/mod.rs
@@ -2,6 +2,8 @@ cfg_if::cfg_if! {
     if #[cfg(any(target_os = "linux", target_os = "android"))] {
         mod futex;
         pub use futex::Parker;
+    } else if #[cfg(windows)] {
+        pub use crate::sys::thread_parker::Parker;
     } else {
         mod generic;
         pub use generic::Parker;