Skip to content

Commit

Permalink
xtensa: Support swap/CAS
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Oct 29, 2024
1 parent 47298a0 commit 83c148a
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 20 deletions.
3 changes: 3 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ DWCAS
espup
fild
fistp
getex
Halfword
IMAFD
inequal
Expand Down Expand Up @@ -56,6 +57,7 @@ mfcr
mfence
mgba
movd
movi
movlps
movq
movsd
Expand All @@ -74,6 +76,7 @@ rcpc
risbg
rsbegin
rsend
scompare
seqz
sete
shufps
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, Power
| avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 |||
| hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 || |
| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 || \[1] |

\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default).<br>
\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.<br>
\[3] Requires Rust 1.72+.<br>
\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.<br>
Expand Down
12 changes: 12 additions & 0 deletions src/arch/cfgs/xtensa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,22 @@ macro_rules! cfg_has_atomic_128 {
macro_rules! cfg_no_atomic_128 {
($($tt:tt)*) => { $($tt)* };
}
#[cfg(target_feature = "s32c1i")]
#[macro_export]
macro_rules! cfg_has_atomic_cas {
($($tt:tt)*) => { $($tt)* };
}
#[cfg(target_feature = "s32c1i")]
#[macro_export]
macro_rules! cfg_no_atomic_cas {
($($tt:tt)*) => {};
}
#[cfg(not(target_feature = "s32c1i"))]
#[macro_export]
macro_rules! cfg_has_atomic_cas {
($($tt:tt)*) => {};
}
#[cfg(not(target_feature = "s32c1i"))]
#[macro_export]
macro_rules! cfg_no_atomic_cas {
($($tt:tt)*) => { $($tt)* };
Expand Down
275 changes: 259 additions & 16 deletions src/arch/xtensa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,28 @@
Xtensa
Refs:
- Xtensa Instruction Set Architecture (ISA) Reference Manual https://web.archive.org/web/20241005102231/https://0x04.net/~mwk/doc/xtensa.pdf
- Xtensa Instruction Set Architecture (ISA) Summary for all Xtensa LX Processors
https://www.cadence.com/content/dam/cadence-www/global/en_US/documents/tools/silicon-solutions/compute-ip/isa-summary.pdf
- https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/test/CodeGen/Xtensa/atomic-load-store.ll
- https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/test/CodeGen/Xtensa/atomicrmw.ll
Note that LLVM's codegen for sub-word atomics seem to be suboptimal (extra branch, memw, l32i.n, etc.).
Note that l32ai (acquire load), s32ri (release store), and l32ex/s32ex/getex (LL/SC) are not yet supported in LLVM.
https://github.com/espressif/llvm-project/blob/xtensa_release_18.1.2/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
*/

#[path = "cfgs/xtensa.rs"]
mod cfgs;

use core::{arch::asm, mem::MaybeUninit, sync::atomic::Ordering};

#[cfg(target_feature = "s32c1i")]
use crate::raw::{AtomicCompareExchange, AtomicSwap};
use crate::raw::{AtomicLoad, AtomicStore};

macro_rules! atomic {
($int_type:ident, $asm_size:tt, $asm_suffix:tt, $asm_load_ext:tt) => {
#[rustfmt::skip]
macro_rules! atomic_load_store {
($int_type:ident, $bits:tt, $narrow:tt, $unsigned:tt) => {
impl AtomicLoad for $int_type {
#[inline]
unsafe fn atomic_load(
Expand All @@ -30,8 +39,8 @@ macro_rules! atomic {
macro_rules! atomic_load {
($acquire:tt) => {
asm!(
concat!("l", $asm_size, $asm_load_ext, "i", $asm_suffix, " {out}, {src}, 0"),
$acquire,
concat!("l", $bits, $unsigned, "i", $narrow, " {out}, {src}, 0"), // atomic { out = *src }
$acquire, // fence
src = in(reg) ptr_reg!(src),
out = lateout(reg) out,
options(nostack, preserves_flags),
Expand Down Expand Up @@ -59,9 +68,9 @@ macro_rules! atomic {
macro_rules! atomic_store {
($acquire:tt, $release:tt) => {
asm!(
$release,
concat!("s", $asm_size, "i", $asm_suffix, " {val}, {dst}, 0"),
$acquire,
$release, // fence
concat!("s", $bits, "i", $narrow, " {val}, {dst}, 0"), // atomic { *dst = val }
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
val = in(reg) val,
options(nostack, preserves_flags),
Expand All @@ -80,11 +89,245 @@ macro_rules! atomic {
};
}

atomic!(i8, "8", "", "u");
atomic!(u8, "8", "", "u");
atomic!(i16, "16", "", "u");
atomic!(u16, "16", "", "u");
atomic!(i32, "32", ".n", "");
atomic!(u32, "32", ".n", "");
atomic!(isize, "32", ".n", "");
atomic!(usize, "32", ".n", "");
#[rustfmt::skip]
macro_rules! atomic {
($int_type:ident) => {
atomic_load_store!($int_type, "32", ".n", "");
#[cfg(target_feature = "s32c1i")]
impl AtomicSwap for $int_type {
#[inline]
unsafe fn atomic_swap(
dst: *mut MaybeUninit<Self>,
val: MaybeUninit<Self>,
order: Ordering,
) -> MaybeUninit<Self> {
let out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
macro_rules! atomic_swap {
($acquire:tt, $release:tt) => {
asm!(
$release, // fence
"l32i.n {out}, {dst}, 0", // atomic { out = *dst }
"2:",
"mov.n {tmp}, {out}", // tmp = out
"wsr {tmp}, scompare1", // scompare1 = tmp
"mov.n {out}, {val}", // out = val
"s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x }
"bne {tmp}, {out}, 2b", // if tmp != out { jump '2 }
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
val = in(reg) val,
out = out(reg) out,
tmp = out(reg) _,
out("scompare1") _,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_swap!("", ""),
Ordering::Acquire => atomic_swap!("memw", ""),
Ordering::Release => atomic_swap!("", "memw"),
Ordering::AcqRel | Ordering::SeqCst => atomic_swap!("memw", "memw"),
_ => unreachable!(),
}
}
out
}
}
#[cfg(target_feature = "s32c1i")]
impl AtomicCompareExchange for $int_type {
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut MaybeUninit<Self>,
old: MaybeUninit<Self>,
new: MaybeUninit<Self>,
success: Ordering,
failure: Ordering,
) -> (MaybeUninit<Self>, bool) {
let order = crate::utils::upgrade_success_ordering(success, failure);
let out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
let mut r: u32 = 1;
macro_rules! atomic_cmpxchg {
($acquire:tt, $release:tt) => {
asm!(
$release, // fence
"wsr {old}, scompare1", // scompare1 = old
"s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x }
$acquire, // fence
"beq {old}, {out}, 2f", // if old == out { jump '2 }
"movi {r}, 0", // r = 0
"2:",
dst = in(reg) ptr_reg!(dst),
old = in(reg) old,
out = inout(reg) new => out,
r = inout(reg) r,
out("scompare1") _,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_cmpxchg!("", ""),
Ordering::Acquire => atomic_cmpxchg!("memw", ""),
Ordering::Release => atomic_cmpxchg!("", "memw"),
Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("memw", "memw"),
_ => unreachable!(),
}
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
(out, r != 0)
}
}
}
};
}

#[rustfmt::skip]
macro_rules! atomic_sub_word {
($int_type:ident, $bits:tt) => {
atomic_load_store!($int_type, $bits, "", "u");
#[cfg(target_feature = "s32c1i")]
impl AtomicSwap for $int_type {
#[inline]
unsafe fn atomic_swap(
dst: *mut MaybeUninit<Self>,
val: MaybeUninit<Self>,
order: Ordering,
) -> MaybeUninit<Self> {
let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
let out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
macro_rules! atomic_swap {
($acquire:tt, $release:tt) => {
// Implement sub-word atomic operations using word-sized CAS loop.
// See also create_sub_word_mask_values.
asm!(
"ssl {shift}", // sar = 32 - shift
"sll {val}, {val}", // val <<= shift
$release, // fence
"l32i.n {out}, {dst}, 0", // atomic { out = *dst }
"2:",
"mov.n {tmp}, {out}", // tmp = out
"wsr {tmp}, scompare1", // scompare1 = tmp
"and {out}, {out}, {inv_mask}", // out &= inv_mask
"or {out}, {out}, {val}", // out |= val
"s32c1i {out}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = out }; out = _x }
"bne {tmp}, {out}, 2b", // if tmp != out { jump '2 }
"and {out}, {out}, {mask}", // out &= mask
"ssr {shift}", // sar = shift
"srl {out}, {out}", // out >>= shift
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
val = inout(reg) crate::utils::ZeroExtend::zero_extend(val) => _,
out = out(reg) out,
shift = in(reg) shift,
mask = in(reg) mask,
inv_mask = in(reg) !mask,
tmp = out(reg) _,
out("scompare1") _,
out("sar") _,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_swap!("", ""),
Ordering::Acquire => atomic_swap!("memw", ""),
Ordering::Release => atomic_swap!("", "memw"),
Ordering::AcqRel | Ordering::SeqCst => atomic_swap!("memw", "memw"),
_ => unreachable!(),
}
}
out
}
}
#[cfg(target_feature = "s32c1i")]
impl AtomicCompareExchange for $int_type {
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut MaybeUninit<Self>,
old: MaybeUninit<Self>,
new: MaybeUninit<Self>,
success: Ordering,
failure: Ordering,
) -> (MaybeUninit<Self>, bool) {
let order = crate::utils::upgrade_success_ordering(success, failure);
let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
let out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
let mut r: u32;
macro_rules! atomic_cmpxchg {
($acquire:tt, $release:tt) => {
// Implement sub-word atomic operations using word-sized CAS loop.
// See also create_sub_word_mask_values.
asm!(
"ssl {shift}", // sar = 32 - shift
"sll {old}, {old}", // old <<= shift
"sll {new}, {new}", // new <<= shift
$release, // fence
"l32i.n {prev}, {dst}, 0", // atomic { prev = *dst }
"and {out}, {prev}, {mask}", // out = prev & mask
"2:",
"bne {out}, {old}, 3f", // if out != old { jump '3 }
"mov.n {tmp}, {prev}", // tmp = prev
"wsr {tmp}, scompare1", // scompare1 = tmp
"and {prev}, {prev}, {inv_mask}", // prev &= inv_mask
"or {prev}, {prev}, {new}", // prev |= new
"s32c1i {prev}, {dst}, 0", // atomic { _x = *dst; if _x == scompare1 { *dst = prev }; prev = _x }
"and {out}, {prev}, {mask}", // out = prev & mask
"bne {tmp}, {prev}, 2b", // if tmp != prev { jump '2 }
"movi {tmp}, 1", // tmp = 1
"j 4f", // jump '4
"3:",
"movi {tmp}, 0", // tmp = 0
"4:",
"ssr {shift}", // sar = shift
"srl {out}, {out}", // out >>= shift
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
old = inout(reg) crate::utils::ZeroExtend::zero_extend(old) => _,
new = inout(reg) crate::utils::ZeroExtend::zero_extend(new) => _,
out = out(reg) out,
shift = in(reg) shift,
mask = in(reg) mask,
inv_mask = in(reg) !mask,
tmp = out(reg) r,
prev = out(reg) _,
out("scompare1") _,
out("sar") _,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_cmpxchg!("", ""),
Ordering::Acquire => atomic_cmpxchg!("memw", ""),
Ordering::Release => atomic_cmpxchg!("", "memw"),
Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("memw", "memw"),
_ => unreachable!(),
}
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
(out, r != 0)
}
}
}
};
}

atomic_sub_word!(i8, "8");
atomic_sub_word!(u8, "8");
atomic_sub_word!(i16, "16");
atomic_sub_word!(u16, "16");
atomic!(i32);
atomic!(u32);
atomic!(isize);
atomic!(usize);
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, MIPS32, MIPS64, Power
| avr \[4] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ |
| hexagon \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ |
| m68k \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] |
| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | |
| xtensa \[4] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] |
\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default).<br>
\[1] Arm's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) extension such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires M68020+ (Linux is M68020+ by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[2] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.<br>
\[3] Requires Rust 1.72+.<br>
\[4] Requires nightly due to `#![feature(asm_experimental_arch)]`.<br>
Expand Down
1 change: 1 addition & 0 deletions tests/xtensa/.cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ runner = "wokwi-server --chip esp32"
[target.xtensa-esp32s2-none-elf]
runner = "wokwi-server --chip esp32s2"
[target.xtensa-esp32s3-none-elf]
# Note: https://github.com/MabezDev/wokwi-server/pull/16 is not yet released
runner = "wokwi-server --chip esp32s3"
Loading

0 comments on commit 83c148a

Please sign in to comment.