Skip to content

Commit

Permalink
Add a x86_64::cmpxchg16b intrinsic
Browse files Browse the repository at this point in the history
This intrinsic isn't actually specified by Intel, but it's something
gated with CPUID and can otherwise be a useful thing to have when
building primitives!

There exists an `AtomicU128` type in the standard library but it's only
exposed currently (and it's unstable) when a platform fully supports
128-bit atomics. The x86_64 architecture does not support it *unless*
the `cmpxchg16b` instruction is available, and it isn't always available!

This commit is also a proposal for how we can include support for
128-bit atomics in the standard library on relevant platforms. I'm
thinking that we'll expose this one low-level intrinsic in
`std::arch::x86_64`, and then if desired a crate on crates.io can build
`AtomicU128` from this API.

In any case this is all unstable regardless!
  • Loading branch information
alexcrichton committed Jan 2, 2019
1 parent abcdfe3 commit 98c489f
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 1 deletion.
74 changes: 74 additions & 0 deletions coresimd/x86_64/cmpxchg16b.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use sync::atomic::Ordering;

#[cfg(test)]
use stdsimd_test::assert_instr;

/// Compare and exchange 16 bytes (128 bits) of data atomically.
///
/// This intrinsic corresponds to the `cmpxchg16b` instruction on x86_64
/// processors. It performs an atomic compare-and-swap, updating the `ptr`
/// memory location to `val` if the current value in memory equals `old`.
///
/// # Return value
///
/// This function returns the previous value at the memory location. If it is
/// equal to `old` then the memory was updated to `new`.
///
/// # Memory Orderings
///
/// This atomic operations has the same semantics of memory orderings as
/// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory
/// instead of just a pointer.
///
/// For more information on memory orderings here see the `compare_exchange`
/// documentation for other `Atomic*` types in the standard library.
///
/// # Unsafety
///
/// This method is unsafe because it takes a raw pointer and will attempt to
/// read and possibly write the memory at the pointer. The pointer must also be
/// aligned on a 16-byte boundary.
///
/// This method also requires the `cmpxchg16b` CPU feature to be available at
/// runtime to work correctly. If the CPU running the binary does not actually
/// support `cmpxchg16b` and the program enters an execution path that
/// eventually would reach this function the behavior is undefined.
///
/// The `success` ordering must also be stronger or equal to `failure`, or this
/// function call is undefined. See the `Atomic*` documentation's
/// `compare_exchange` function for more information. When `compare_exchange`
/// panics, this is undefined behavior. Currently this function aborts the
/// process with an undefined instruction.
#[inline]
#[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))]
#[target_feature(enable = "cmpxchg16b")]
pub unsafe fn cmpxchg16b(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> u128 {
use intrinsics;
use sync::atomic::Ordering::*;

debug_assert!(dst as usize % 16 == 0);

let (val, _ok) = match (success, failure) {
(Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new),
(Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new),
(AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new),
(Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new),
(SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new),
(Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new),
(AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new),
(SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new),
(SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new),

// The above block is all copied from libcore, and this statement is
// also copied from libcore except that it's a panic in libcore and we
// have a little bit more of a lightweight panic here.
_ => ::coresimd::x86::ud2(),
};
val
}
3 changes: 3 additions & 0 deletions coresimd/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ pub use self::bswap::*;

mod rdrand;
pub use self::rdrand::*;

mod cmpxchg16b;
pub use self::cmpxchg16b::*;
5 changes: 5 additions & 0 deletions crates/coresimd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
sse4a_target_feature,
arm_target_feature,
aarch64_target_feature,
cmpxchg16b_target_feature,
avx512_target_feature,
mips_target_feature,
powerpc_target_feature,
Expand Down Expand Up @@ -67,6 +68,8 @@
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
)]

#[macro_use]
#[allow(unused_imports)]
extern crate core as _core;
#[cfg(test)]
#[macro_use]
Expand Down Expand Up @@ -122,6 +125,8 @@ use _core::result;
#[allow(unused_imports)]
use _core::slice;
#[allow(unused_imports)]
use _core::sync;
#[allow(unused_imports)]
use _core::u128;
#[allow(unused_imports)]
use _core::u8;
5 changes: 4 additions & 1 deletion crates/stdsimd-test/src/disassembly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
.skip_while(|s| {
s.len() == expected_len
&& usize::from_str_radix(s, 16).is_ok()
}).map(|s| s.to_string())
})
.skip_while(|s| *s == "lock") // skip x86-specific prefix
.map(|s| s.to_string())
.collect::<Vec<String>>();
instructions.push(Instruction { parts });
}
Expand Down Expand Up @@ -198,6 +200,7 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
.skip_while(|s| {
s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
}).map(|s| s.to_string())
.skip_while(|s| *s == "lock") // skip x86-specific prefix
.collect::<Vec<String>>();
instructions.push(Instruction { parts });
}
Expand Down
2 changes: 2 additions & 0 deletions crates/stdsimd-verify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"u16" => quote! { &U16 },
"u32" => quote! { &U32 },
"u64" => quote! { &U64 },
"u128" => quote! { &U128 },
"u8" => quote! { &U8 },
"Ordering" => quote! { &ORDERING },
"CpuidResult" => quote! { &CPUID },

// arm ...
Expand Down
5 changes: 5 additions & 0 deletions crates/stdsimd-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ static I8: Type = Type::PrimSigned(8);
static U16: Type = Type::PrimUnsigned(16);
static U32: Type = Type::PrimUnsigned(32);
static U64: Type = Type::PrimUnsigned(64);
static U128: Type = Type::PrimUnsigned(128);
static U8: Type = Type::PrimUnsigned(8);
static ORDERING: Type = Type::Ordering;

static M64: Type = Type::M64;
static M128: Type = Type::M128;
Expand Down Expand Up @@ -75,6 +77,7 @@ enum Type {
Tuple,
CpuidResult,
Never,
Ordering,
}

stdsimd_verify::x86_functions!(static FUNCTIONS);
Expand Down Expand Up @@ -145,6 +148,8 @@ fn verify_all_signatures() {
"__cpuid_count" |
"__cpuid" |
"__get_cpuid_max" |
// Not listed with intel, but manually verified
"cmpxchg16b" |
// The UD2 intrinsic is not defined by Intel, but it was agreed on
// in the RFC Issue 2512:
// https://github.com/rust-lang/rfcs/issues/2512
Expand Down
6 changes: 6 additions & 0 deletions stdsimd/arch/detect/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ macro_rules! is_x86_feature_detected {
cfg!(target_feature = "xsavec") || $crate::arch::detect::check_for(
$crate::arch::detect::Feature::xsavec)
};
("cmpxchg16b") => {
cfg!(target_feature = "cmpxchg16b") || $crate::arch::detect::check_for(
$crate::arch::detect::Feature::cmpxchg16b)
};
($t:tt) => {
compile_error!(concat!("unknown target feature: ", $t))
};
Expand Down Expand Up @@ -316,4 +320,6 @@ pub enum Feature {
xsaves,
/// XSAVEC (Save Processor Extended States Compacted)
xsavec,
/// CMPXCH16B, a 16-byte compare-and-swap instruction
cmpxchg16b,
}
6 changes: 6 additions & 0 deletions stdsimd/arch/detect/os/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ fn detect_features() -> cache::Initializer {

enable(proc_info_ecx, 0, Feature::sse3);
enable(proc_info_ecx, 9, Feature::ssse3);
enable(proc_info_ecx, 13, Feature::cmpxchg16b);
enable(proc_info_ecx, 19, Feature::sse4_1);
enable(proc_info_ecx, 20, Feature::sse4_2);
enable(proc_info_ecx, 23, Feature::popcnt);
Expand Down Expand Up @@ -288,6 +289,7 @@ mod tests {
println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
}

#[test]
Expand Down Expand Up @@ -344,5 +346,9 @@ mod tests {
is_x86_feature_detected!("xsaves"),
information.xsaves_xrstors_and_ia32_xss()
);
assert_eq!(
is_x86_feature_detected!("cmpxchg16b"),
information.cmpxchg16b(),
);
}
}

0 comments on commit 98c489f

Please sign in to comment.