Skip to content

Commit

Permalink
LoongArch: Remove redundant barrier instructions before LL-SC loops
Browse files Browse the repository at this point in the history
This is isomorphic to the LLVM changes [1-2].

On LoongArch, the LL and SC instructions has memory barrier semantics:

- LL: <memory-barrier> + <load-exclusive>
- SC: <store-conditional> + <memory-barrier>

But the compare and swap operation is allowed to fail, and if it fails
the SC instruction is not executed, thus the guarantee of acquiring
semantics cannot be ensured. Therefore, an acquire barrier needs to be
generated when failure_memorder includes an acquire operation.

On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an
acquire barrier; on CPUs implementing LoongArch v1.00, it is a full
barrier.  So it's always enough for acquire semantics.  OTOH if an
acquire semantic is not needed, we still needs the "dbar 0x700" as the
load-load barrier like all LL-SC loops.

[1]:llvm/llvm-project#67391
[2]:llvm/llvm-project#69339

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_memmodel_needs_release_fence): Remove.
	(loongarch_cas_failure_memorder_needs_acquire): New static
	function.
	(loongarch_print_operand): Redefine 'G' for the barrier on CAS
	failure.
	* config/loongarch/sync.md (atomic_cas_value_strong<mode>):
	Remove the redundant barrier before the LL instruction, and
	emit an acquire barrier on failure if needed by
	failure_memorder.
	(atomic_cas_value_cmp_and_7_<mode>): Likewise.
	(atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier
	before the LL instruction.
	(atomic_cas_value_sub_7_<mode>): Likewise.
	(atomic_cas_value_and_7_<mode>): Likewise.
	(atomic_cas_value_xor_7_<mode>): Likewise.
	(atomic_cas_value_or_7_<mode>): Likewise.
	(atomic_cas_value_nand_7_<mode>): Likewise.
	(atomic_cas_value_exchange_7_<mode>): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/cas-acquire.c: New test.
  • Loading branch information
xry111 committed Nov 15, 2023
1 parent 452476d commit 4d86dc5
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 42 deletions.
30 changes: 17 additions & 13 deletions gcc/config/loongarch/loongarch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5833,27 +5833,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
}
}

/* Return true if a FENCE should be emitted to before a memory access to
implement the release portion of memory model MODEL. */
/* Return true if a FENCE should be emitted after a failed CAS to
implement the acquire semantic of failure_memorder. */

static bool
loongarch_memmodel_needs_release_fence (enum memmodel model)
loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
{
switch (model)
switch (memmodel_base (model))
{
case MEMMODEL_ACQUIRE:
case MEMMODEL_ACQ_REL:
case MEMMODEL_SEQ_CST:
case MEMMODEL_SYNC_SEQ_CST:
case MEMMODEL_RELEASE:
case MEMMODEL_SYNC_RELEASE:
return true;

case MEMMODEL_ACQUIRE:
case MEMMODEL_CONSUME:
case MEMMODEL_SYNC_ACQUIRE:
case MEMMODEL_RELAXED:
case MEMMODEL_RELEASE:
return false;

/* MEMMODEL_CONSUME is deliberately not handled because it's always
replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
MEMMODEL_CONSUME, read the change (re)introducing it carefully and
decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
default:
gcc_unreachable ();
}
Expand Down Expand Up @@ -5966,7 +5966,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
'd' Print CONST_INT OP in decimal.
'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
'F' Print the FPU branch condition for comparison OP.
'G' Print a DBAR insn if the memory model requires a release.
'G' Print a DBAR insn for CAS failure (with an acquire semantic if
needed, otherwise a simple load-load barrier).
'H' Print address 52-61bit relocation associated with OP.
'h' Print the high-part relocation associated with OP.
'i' Print i if the operand is not a register.
Expand Down Expand Up @@ -6057,8 +6058,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
break;

case 'G':
if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
fputs ("dbar\t0", file);
if (loongarch_cas_failure_memorder_needs_acquire (
memmodel_from_int (INTVAL (op))))
fputs ("dbar\t0b10100", file);
else
fputs ("dbar\t0x700", file);
break;

case 'h':
Expand Down
49 changes: 20 additions & 29 deletions gcc/config/loongarch/sync.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,18 @@
(clobber (match_scratch:GPR 6 "=&r"))]
""
{
return "%G5\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"bne\\t%0,%z2,2f\\n\\t"
"or%i3\\t%6,$zero,%3\\n\\t"
"sc.<amo>\\t%6,%1\\n\\t"
"beq\\t$zero,%6,1b\\n\\t"
"beqz\\t%6,1b\\n\\t"
"b\\t3f\\n\\t"
"2:\\n\\t"
"dbar\\t0x700\\n\\t"
"%G5\\n\\t"
"3:\\n\\t";
}
[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool output
Expand Down Expand Up @@ -267,8 +266,7 @@
(clobber (match_scratch:GPR 7 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%2\\n\\t"
"bne\\t%7,%z4,2f\\n\\t"
Expand All @@ -278,10 +276,10 @@
"beq\\t$zero,%7,1b\\n\\t"
"b\\t3f\\n\\t"
"2:\\n\\t"
"dbar\\t0x700\\n\\t"
"%G6\\n\\t"
"3:\\n\\t";
}
[(set (attr "length") (const_int 40))])
[(set (attr "length") (const_int 36))])

(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool output
Expand Down Expand Up @@ -336,8 +334,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"add.w\\t%8,%0,%z5\\n\\t"
Expand All @@ -347,7 +344,7 @@
"beq\\t$zero,%7,1b";
}

[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_sub_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
Expand All @@ -363,8 +360,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"sub.w\\t%8,%0,%z5\\n\\t"
Expand All @@ -373,7 +369,7 @@
"sc.<amo>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_and_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
Expand All @@ -389,8 +385,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"and\\t%8,%0,%z5\\n\\t"
Expand All @@ -399,7 +394,7 @@
"sc.<amo>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_xor_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
Expand All @@ -415,8 +410,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"xor\\t%8,%0,%z5\\n\\t"
Expand All @@ -426,7 +420,7 @@
"beq\\t$zero,%7,1b";
}

[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_or_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
Expand All @@ -442,8 +436,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"or\\t%8,%0,%z5\\n\\t"
Expand All @@ -453,7 +446,7 @@
"beq\\t$zero,%7,1b";
}

[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_nand_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
Expand All @@ -469,8 +462,7 @@
(clobber (match_scratch:GPR 8 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%3\\n\\t"
"and\\t%8,%0,%z5\\n\\t"
Expand All @@ -479,7 +471,7 @@
"sc.<amo>\\t%7,%1\\n\\t"
"beq\\t$zero,%7,1b";
}
[(set (attr "length") (const_int 32))])
[(set (attr "length") (const_int 28))])

(define_insn "atomic_cas_value_exchange_7_<mode>"
[(set (match_operand:GPR 0 "register_operand" "=&r")
Expand All @@ -494,8 +486,7 @@
(clobber (match_scratch:GPR 7 "=&r"))]
""
{
return "%G6\\n\\t"
"1:\\n\\t"
return "1:\\n\\t"
"ll.<amo>\\t%0,%1\\n\\t"
"and\\t%7,%0,%z3\\n\\t"
"or%i5\\t%7,%7,%5\\n\\t"
Expand Down
82 changes: 82 additions & 0 deletions gcc/testsuite/gcc.target/loongarch/cas-acquire.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/* { dg-do run } */
/* { dg-require-effective-target c99_runtime } */
/* { dg-require-effective-target pthread } */
/* { dg-options "-std=c99 -pthread" } */

/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934
reported that this had failed with GCC and 3A6000. */

#include <pthread.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdio.h>

static unsigned int tags[32];
static unsigned int vals[32];

static void *
writer_entry (void *data)
{
atomic_uint *pt = (atomic_uint *)tags;
atomic_uint *pv = (atomic_uint *)vals;

for (unsigned int n = 1; n < 10000; n++)
{
atomic_store_explicit (&pv[n & 31], n, memory_order_release);
atomic_store_explicit (&pt[n & 31], n, memory_order_release);
}

return NULL;
}

static void *
reader_entry (void *data)
{
atomic_uint *pt = (atomic_uint *)tags;
atomic_uint *pv = (atomic_uint *)vals;
int i;

for (;;)
{
for (i = 0; i < 32; i++)
{
unsigned int tag = 0;
bool res;

res = atomic_compare_exchange_weak_explicit (
&pt[i], &tag, 0, memory_order_acquire, memory_order_acquire);
if (!res)
{
unsigned int val;

val = atomic_load_explicit (&pv[i], memory_order_relaxed);
if (val < tag)
__builtin_trap ();
}
}
}

return NULL;
}

int
main (int argc, char *argv[])
{
pthread_t writer;
pthread_t reader;
int res;

res = pthread_create (&writer, NULL, writer_entry, NULL);
if (res < 0)
__builtin_trap ();

res = pthread_create (&reader, NULL, reader_entry, NULL);
if (res < 0)
__builtin_trap ();

res = pthread_join (writer, NULL);
if (res < 0)
__builtin_trap ();

return 0;
}

0 comments on commit 4d86dc5

Please sign in to comment.