From 83c148a113af0faa17ed7c7771a42758e8157551 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Wed, 30 Oct 2024 01:31:35 +0900 Subject: [PATCH] xtensa: Support swap/CAS --- .github/.cspell/project-dictionary.txt | 3 + README.md | 4 +- src/arch/cfgs/xtensa.rs | 12 ++ src/arch/xtensa.rs | 275 +++++++++++++++++++++++-- src/lib.rs | 4 +- tests/xtensa/.cargo/config.toml | 1 + tests/xtensa/src/main.rs | 6 + 7 files changed, 285 insertions(+), 20 deletions(-) diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt index 1e61e887..2ba1b7ba 100644 --- a/.github/.cspell/project-dictionary.txt +++ b/.github/.cspell/project-dictionary.txt @@ -23,6 +23,7 @@ DWCAS espup fild fistp +getex Halfword IMAFD inequal @@ -56,6 +57,7 @@ mfcr mfence mgba movd +movi movlps movq movsd @@ -74,6 +76,7 @@ rcpc risbg rsbegin rsend +scompare seqz sete shufps diff --git a/README.md b/README.md index ed1e70a5..7250cad5 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,9 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, Power | avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | | hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | | m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | | +| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default).
+\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.
\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
\[3] Requires Rust 1.72+.
\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.
diff --git a/src/arch/cfgs/xtensa.rs b/src/arch/cfgs/xtensa.rs index dde8e6d6..7bf8ec7b 100644 --- a/src/arch/cfgs/xtensa.rs +++ b/src/arch/cfgs/xtensa.rs @@ -42,10 +42,22 @@ macro_rules! cfg_has_atomic_128 { macro_rules! cfg_no_atomic_128 { ($($tt:tt)*) => { $($tt)* }; } +#[cfg(target_feature = "s32c1i")] +#[macro_export] +macro_rules! cfg_has_atomic_cas { + ($($tt:tt)*) => { $($tt)* }; +} +#[cfg(target_feature = "s32c1i")] +#[macro_export] +macro_rules! cfg_no_atomic_cas { + ($($tt:tt)*) => {}; +} +#[cfg(not(target_feature = "s32c1i"))] #[macro_export] macro_rules! cfg_has_atomic_cas { ($($tt:tt)*) => {}; } +#[cfg(not(target_feature = "s32c1i"))] #[macro_export] macro_rules! cfg_no_atomic_cas { ($($tt:tt)*) => { $($tt)* }; diff --git a/src/arch/xtensa.rs b/src/arch/xtensa.rs index ed574871..d9129448 100644 --- a/src/arch/xtensa.rs +++ b/src/arch/xtensa.rs @@ -4,8 +4,14 @@ Xtensa Refs: -- Xtensa Instruction Set Architecture (ISA) Reference Manual https://web.archive.org/web/20241005102231/https://0x04.net/~mwk/doc/xtensa.pdf +- Xtensa Instruction Set Architecture (ISA) Summary for all Xtensa LX Processors + https://www.cadence.com/content/dam/cadence-www/global/en_US/documents/tools/silicon-solutions/compute-ip/isa-summary.pdf - https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/test/CodeGen/Xtensa/atomic-load-store.ll +- https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/test/CodeGen/Xtensa/atomicrmw.ll + Note that LLVM's codegen for sub-word atomics seem to be suboptimal (extra branch, memw, l32i.n, etc.). + +Note that l32ai (acquire load), s32ri (release store), and l32ex/s32ex/getex (LL/SC) are not yet supported in LLVM. +https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/lib/Target/Xtensa/XtensaInstrInfo.td */ #[path = "cfgs/xtensa.rs"] @@ -13,10 +19,13 @@ mod cfgs; use core::{arch::asm, mem::MaybeUninit, sync::atomic::Ordering}; +#[cfg(target_feature = "s32c1i")] +use crate::raw::{AtomicCompareExchange, AtomicSwap}; use crate::raw::{AtomicLoad, AtomicStore}; -macro_rules! atomic { - ($int_type:ident, $asm_size:tt, $asm_suffix:tt, $asm_load_ext:tt) => { +#[rustfmt::skip] +macro_rules! atomic_load_store { + ($int_type:ident, $bits:tt, $narrow:tt, $unsigned:tt) => { impl AtomicLoad for $int_type { #[inline] unsafe fn atomic_load( @@ -30,8 +39,8 @@ macro_rules! atomic { macro_rules! atomic_load { ($acquire:tt) => { asm!( - concat!("l", $asm_size, $asm_load_ext, "i", $asm_suffix, " {out}, {src}, 0"), - $acquire, + concat!("l", $bits, $unsigned, "i", $narrow, " {out}, {src}, 0"), // atomic { out = *src } + $acquire, // fence src = in(reg) ptr_reg!(src), out = lateout(reg) out, options(nostack, preserves_flags), @@ -59,9 +68,9 @@ macro_rules! atomic { macro_rules! atomic_store { ($acquire:tt, $release:tt) => { asm!( - $release, - concat!("s", $asm_size, "i", $asm_suffix, " {val}, {dst}, 0"), - $acquire, + $release, // fence + concat!("s", $bits, "i", $narrow, " {val}, {dst}, 0"), // atomic { *dst = val } + $acquire, // fence dst = in(reg) ptr_reg!(dst), val = in(reg) val, options(nostack, preserves_flags), @@ -80,11 +89,245 @@ macro_rules! atomic { }; } -atomic!(i8, "8", "", "u"); -atomic!(u8, "8", "", "u"); -atomic!(i16, "16", "", "u"); -atomic!(u16, "16", "", "u"); -atomic!(i32, "32", ".n", ""); -atomic!(u32, "32", ".n", ""); -atomic!(isize, "32", ".n", ""); -atomic!(usize, "32", ".n", ""); +#[rustfmt::skip] +macro_rules! atomic { + ($int_type:ident) => { + atomic_load_store!($int_type, "32", ".n", ""); + #[cfg(target_feature = "s32c1i")] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + let out: MaybeUninit; + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_swap { + ($acquire:tt, $release:tt) => { + asm!( + $release, // fence + "l32i.n {out}, {dst}, 0", // atomic { out = *dst } + "2:", + "mov.n {tmp}, {out}", // tmp = out + "wsr {tmp}, scompare1", // scompare1 = tmp + "mov.n {out}, {val}", // out = val + "s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x } + "bne {tmp}, {out}, 2b", // if tmp != out { jump '2 } + $acquire, // fence + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, + out = out(reg) out, + tmp = out(reg) _, + out("scompare1") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_swap!("", ""), + Ordering::Acquire => atomic_swap!("memw", ""), + Ordering::Release => atomic_swap!("", "memw"), + Ordering::AcqRel | Ordering::SeqCst => atomic_swap!("memw", "memw"), + _ => unreachable!(), + } + } + out + } + } + #[cfg(target_feature = "s32c1i")] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + let order = crate::utils::upgrade_success_ordering(success, failure); + let out: MaybeUninit; + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: u32 = 1; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + $release, // fence + "wsr {old}, scompare1", // scompare1 = old + "s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x } + $acquire, // fence + "beq {old}, {out}, 2f", // if old == out { jump '2 } + "movi {r}, 0", // r = 0 + "2:", + dst = in(reg) ptr_reg!(dst), + old = in(reg) old, + out = inout(reg) new => out, + r = inout(reg) r, + out("scompare1") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("memw", ""), + Ordering::Release => atomic_cmpxchg!("", "memw"), + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("memw", "memw"), + _ => unreachable!(), + } + crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test + (out, r != 0) + } + } + } + }; +} + +#[rustfmt::skip] +macro_rules! atomic_sub_word { + ($int_type:ident, $bits:tt) => { + atomic_load_store!($int_type, $bits, "", "u"); + #[cfg(target_feature = "s32c1i")] + impl AtomicSwap for $int_type { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst); + let out: MaybeUninit; + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_swap { + ($acquire:tt, $release:tt) => { + // Implement sub-word atomic operations using word-sized CAS loop. + // See also create_sub_word_mask_values. + asm!( + "ssl {shift}", // sar = 32 - shift + "sll {val}, {val}", // val <<= shift + $release, // fence + "l32i.n {out}, {dst}, 0", // atomic { out = *dst } + "2:", + "mov.n {tmp}, {out}", // tmp = out + "wsr {tmp}, scompare1", // scompare1 = tmp + "and {out}, {out}, {inv_mask}", // out &= inv_mask + "or {out}, {out}, {val}", // out |= val + "s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x } + "bne {tmp}, {out}, 2b", // if tmp != out { jump '2 } + "and {out}, {out}, {mask}", // out &= mask + "ssr {shift}", // sar = shift + "srl {out}, {out}", // out >>= shift + $acquire, // fence + dst = in(reg) ptr_reg!(dst), + val = inout(reg) crate::utils::ZeroExtend::zero_extend(val) => _, + out = out(reg) out, + shift = in(reg) shift, + mask = in(reg) mask, + inv_mask = in(reg) !mask, + tmp = out(reg) _, + out("scompare1") _, + out("sar") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_swap!("", ""), + Ordering::Acquire => atomic_swap!("memw", ""), + Ordering::Release => atomic_swap!("", "memw"), + Ordering::AcqRel | Ordering::SeqCst => atomic_swap!("memw", "memw"), + _ => unreachable!(), + } + } + out + } + } + #[cfg(target_feature = "s32c1i")] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + let order = crate::utils::upgrade_success_ordering(success, failure); + let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst); + let out: MaybeUninit; + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut r: u32; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + // Implement sub-word atomic operations using word-sized CAS loop. + // See also create_sub_word_mask_values. + asm!( + "ssl {shift}", // sar = 32 - shift + "sll {old}, {old}", // old <<= shift + "sll {new}, {new}", // new <<= shift + $release, // fence + "l32i.n {prev}, {dst}, 0", // atomic { prev = *dst } + "and {out}, {prev}, {mask}", // out = prev & mask + "2:", + "bne {out}, {old}, 3f", // if out != old { jump '3 } + "mov.n {tmp}, {prev}", // tmp = prev + "wsr {tmp}, scompare1", // scompare1 = tmp + "and {prev}, {prev}, {inv_mask}", // prev &= inv_mask + "or {prev}, {prev}, {new}", // prev |= new + "s32c1i {prev}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = prev }; prev = _x } + "and {out}, {prev}, {mask}", // out = prev & mask + "bne {tmp}, {prev}, 2b", // if tmp != prev { jump '2 } + "movi {tmp}, 1", // tmp = 1 + "j 4f", // jump '4 + "3:", + "movi {tmp}, 0", // tmp = 0 + "4:", + "ssr {shift}", // sar = shift + "srl {out}, {out}", // out >>= shift + $acquire, // fence + dst = in(reg) ptr_reg!(dst), + old = inout(reg) crate::utils::ZeroExtend::zero_extend(old) => _, + new = inout(reg) crate::utils::ZeroExtend::zero_extend(new) => _, + out = out(reg) out, + shift = in(reg) shift, + mask = in(reg) mask, + inv_mask = in(reg) !mask, + tmp = out(reg) r, + prev = out(reg) _, + out("scompare1") _, + out("sar") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("memw", ""), + Ordering::Release => atomic_cmpxchg!("", "memw"), + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("memw", "memw"), + _ => unreachable!(), + } + crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test + (out, r != 0) + } + } + } + }; +} + +atomic_sub_word!(i8, "8"); +atomic_sub_word!(u8, "8"); +atomic_sub_word!(i16, "16"); +atomic_sub_word!(u16, "16"); +atomic!(i32); +atomic!(u32); +atomic!(isize); +atomic!(usize); diff --git a/src/lib.rs b/src/lib.rs index c8570550..3a86cc6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,9 +36,9 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, Power | avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | | hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | | m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | | +| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default).
+\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.
\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
\[3] Requires Rust 1.72+.
\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.
diff --git a/tests/xtensa/.cargo/config.toml b/tests/xtensa/.cargo/config.toml index 464a006e..46d6780f 100644 --- a/tests/xtensa/.cargo/config.toml +++ b/tests/xtensa/.cargo/config.toml @@ -3,4 +3,5 @@ runner = "wokwi-server --chip esp32" [target.xtensa-esp32s2-none-elf] runner = "wokwi-server --chip esp32s2" [target.xtensa-esp32s3-none-elf] +# Note: https://github.com/MabezDev/wokwi-server/pull/16 is not yet released runner = "wokwi-server --chip esp32s3" diff --git a/tests/xtensa/src/main.rs b/tests/xtensa/src/main.rs index f9cbe8cc..da5ff0eb 100644 --- a/tests/xtensa/src/main.rs +++ b/tests/xtensa/src/main.rs @@ -161,6 +161,12 @@ fn main() -> ! { }; } + cfg_has_atomic_cas! { + println!("target_has_cas: true"); + } + cfg_no_atomic_cas! { + println!("target_has_cas: false"); + } test_atomic!(isize); test_atomic!(usize); test_atomic!(i8);