-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
LoongArch: Remove redundant barrier instructions before LL-SC loops
This is isomorphic to the LLVM changes [1-2]. On LoongArch, the LL and SC instructions has memory barrier semantics: - LL: <memory-barrier> + <load-exclusive> - SC: <store-conditional> + <memory-barrier> But the compare and swap operation is allowed to fail, and if it fails the SC instruction is not executed, thus the guarantee of acquiring semantics cannot be ensured. Therefore, an acquire barrier needs to be generated when failure_memorder includes an acquire operation. On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an acquire barrier; on CPUs implementing LoongArch v1.00, it is a full barrier. So it's always enough for acquire semantics. OTOH if an acquire semantic is not needed, we still needs the "dbar 0x700" as the load-load barrier like all LL-SC loops. [1]:llvm/llvm-project#67391 [2]:llvm/llvm-project#69339 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_memmodel_needs_release_fence): Remove. (loongarch_cas_failure_memorder_needs_acquire): New static function. (loongarch_print_operand): Redefine 'G' for the barrier on CAS failure. * config/loongarch/sync.md (atomic_cas_value_strong<mode>): Remove the redundant barrier before the LL instruction, and emit an acquire barrier on failure if needed by failure_memorder. (atomic_cas_value_cmp_and_7_<mode>): Likewise. (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier before the LL instruction. (atomic_cas_value_sub_7_<mode>): Likewise. (atomic_cas_value_and_7_<mode>): Likewise. (atomic_cas_value_xor_7_<mode>): Likewise. (atomic_cas_value_or_7_<mode>): Likewise. (atomic_cas_value_nand_7_<mode>): Likewise. (atomic_cas_value_exchange_7_<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/loongarch/cas-acquire.c: New test.
- Loading branch information
Showing
3 changed files
with
119 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* { dg-do run } */ | ||
/* { dg-require-effective-target c99_runtime } */ | ||
/* { dg-require-effective-target pthread } */ | ||
/* { dg-options "-std=c99 -pthread" } */ | ||
|
||
/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934 | ||
reported that this had failed with GCC and 3A6000. */ | ||
|
||
#include <pthread.h> | ||
#include <stdatomic.h> | ||
#include <stdbool.h> | ||
#include <stdio.h> | ||
|
||
static unsigned int tags[32]; | ||
static unsigned int vals[32]; | ||
|
||
static void * | ||
writer_entry (void *data) | ||
{ | ||
atomic_uint *pt = (atomic_uint *)tags; | ||
atomic_uint *pv = (atomic_uint *)vals; | ||
|
||
for (unsigned int n = 1; n < 10000; n++) | ||
{ | ||
atomic_store_explicit (&pv[n & 31], n, memory_order_release); | ||
atomic_store_explicit (&pt[n & 31], n, memory_order_release); | ||
} | ||
|
||
return NULL; | ||
} | ||
|
||
static void * | ||
reader_entry (void *data) | ||
{ | ||
atomic_uint *pt = (atomic_uint *)tags; | ||
atomic_uint *pv = (atomic_uint *)vals; | ||
int i; | ||
|
||
for (;;) | ||
{ | ||
for (i = 0; i < 32; i++) | ||
{ | ||
unsigned int tag = 0; | ||
bool res; | ||
|
||
res = atomic_compare_exchange_weak_explicit ( | ||
&pt[i], &tag, 0, memory_order_acquire, memory_order_acquire); | ||
if (!res) | ||
{ | ||
unsigned int val; | ||
|
||
val = atomic_load_explicit (&pv[i], memory_order_relaxed); | ||
if (val < tag) | ||
__builtin_trap (); | ||
} | ||
} | ||
} | ||
|
||
return NULL; | ||
} | ||
|
||
int | ||
main (int argc, char *argv[]) | ||
{ | ||
pthread_t writer; | ||
pthread_t reader; | ||
int res; | ||
|
||
res = pthread_create (&writer, NULL, writer_entry, NULL); | ||
if (res < 0) | ||
__builtin_trap (); | ||
|
||
res = pthread_create (&reader, NULL, reader_entry, NULL); | ||
if (res < 0) | ||
__builtin_trap (); | ||
|
||
res = pthread_join (writer, NULL); | ||
if (res < 0) | ||
__builtin_trap (); | ||
|
||
return 0; | ||
} |