-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add tests for atomicrmw handling of new metadata #89248
AMDGPU: Add tests for atomicrmw handling of new metadata #89248
Conversation
Add baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms. New metadata defined in llvm#89248
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesAdd baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms. New metadata defined in #85052 Patch is 1.49 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89248.diff 16 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
index 94956511c39dfb..961273468e75ff 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
@@ -287,6 +287,72 @@ define amdgpu_gfx i32 @flat_atomic_xchg_i32_ret_offset_scalar(ptr inreg %out, i3
ret i32 %result
}
+define void @flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_swap v[0:1], v2
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_swap v[0:1], v2
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret void
+}
+
+define i32 @flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %result = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret i32 %result
+}
+
; ---------------------------------------------------------------------
; atomicrmw xchg f32
; ---------------------------------------------------------------------
@@ -571,6 +637,72 @@ define amdgpu_gfx float @flat_atomic_xchg_f32_ret_offset_scalar(ptr inreg %out,
ret float %result
}
+define void @flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, float %in) {
+; GCN1-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_swap v[0:1], v2
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_swap v[0:1], v2
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr float, ptr %out, i64 4
+ %tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret void
+}
+
+define float @flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, float %in) {
+; GCN1-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr float, ptr %out, i64 4
+ %result = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret float %result
+}
+
; ---------------------------------------------------------------------
; atomicrmw add
; ---------------------------------------------------------------------
@@ -855,6 +987,72 @@ define amdgpu_gfx i32 @flat_atomic_add_i32_ret_offset_scalar(ptr inreg %out, i32
ret i32 %result
}
+define void @flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_add v[0:1], v2
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_add v[0:1], v2
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_add v[0:1], v2 offset:16
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret void
+}
+
+define i32 @flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_add v0, v[0:1], v2 glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_add v0, v[0:1], v2 glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_add v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %result = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret i32 %result
+}
+
; ---------------------------------------------------------------------
; atomicrmw sub
; ---------------------------------------------------------------------
@@ -1139,6 +1337,72 @@ define amdgpu_gfx i32 @flat_atomic_sub_i32_ret_offset_scalar(ptr inreg %out, i32
ret i32 %result
}
+define void @flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_sub v[0:1], v2
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_sub v[0:1], v2
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_sub v[0:1], v2 offset:16
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret void
+}
+
+define i32 @flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_sub v0, v[0:1], v2 glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_sub v0, v[0:1], v2 glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_sub v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %result = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret i32 %result
+}
+
; ---------------------------------------------------------------------
; atomicrmw and
; ---------------------------------------------------------------------
@@ -1423,61 +1687,127 @@ define amdgpu_gfx i32 @flat_atomic_and_i32_ret_offset_scalar(ptr inreg %out, i32
ret i32 %result
}
-; ---------------------------------------------------------------------
-; atomicrmw nand
-; ---------------------------------------------------------------------
-
-define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
-; GCN1-LABEL: flat_atomic_nand_i32_noret:
+define void @flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
; GCN1: ; %bb.0:
; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT: flat_load_dword v4, v[0:1]
-; GCN1-NEXT: s_mov_b64 s[4:5], 0
-; GCN1-NEXT: .LBB40_1: ; %atomicrmw.start
-; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT: v_and_b32_e32 v3, v4, v2
-; GCN1-NEXT: v_not_b32_e32 v3, v3
-; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_and v[0:1], v2
; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN1-NEXT: buffer_wbinvl1_vol
-; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
-; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT: v_mov_b32_e32 v4, v3
-; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT: s_cbranch_execnz .LBB40_1
-; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN1-NEXT: s_setpc_b64 s[30:31]
;
-; GCN2-LABEL: flat_atomic_nand_i32_noret:
+; GCN2-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
; GCN2: ; %bb.0:
; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT: flat_load_dword v4, v[0:1]
-; GCN2-NEXT: s_mov_b64 s[4:5], 0
-; GCN2-NEXT: .LBB40_1: ; %atomicrmw.start
-; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT: v_and_b32_e32 v3, v4, v2
-; GCN2-NEXT: v_not_b32_e32 v3, v3
-; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_and v[0:1], v2
; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN2-NEXT: buffer_wbinvl1_vol
-; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
-; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT: v_mov_b32_e32 v4, v3
-; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT: s_cbranch_execnz .LBB40_1
-; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN2-NEXT: s_setpc_b64 s[30:31]
;
-; GCN3-LABEL: flat_atomic_nand_i32_noret:
+; GCN3-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_and v[0:1], v2 offset:16
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret void
+}
+
+define i32 @flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT: flat_atomic_and v0, v[0:1], v2 glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT: flat_atomic_and v0, v[0:1], v2 glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3: ; %bb.0:
+; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT: flat_atomic_and v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT: buffer_wbinvl1_vol
+; GCN3-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, ptr %out, i64 4
+ %result = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+ ret i32 %result
+}
+
+; ---------------------------------------------------------------------
+; atomicrmw nand
+; ---------------------------------------------------------------------
+
+define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
+; GCN1-LABEL: flat_atomic_nand_i32_noret:
+; GCN1: ; %bb.0:
+; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT: flat_load_dword v4, v[0:1]
+; GCN1-NEXT: s_mov_b64 s[4:5], 0
+; GCN1-NEXT: .LBB50_1: ; %atomicrmw.start
+; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: v_and_b32_e32 v3, v4, v2
+; GCN1-NEXT: v_not_b32_e32 v3, v3
+; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT: buffer_wbinvl1_vol
+; GCN1-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
+; GCN1-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN1-NEXT: v_mov_b32_e32 v4, v3
+; GCN1-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GCN1-NEXT: s_cbranch_execnz .LBB50_1
+; GCN1-NEXT: ; %bb.2: ; %atomicrmw.end
+; GCN1-NEXT: s_or_b64 exec, exec, s[4:5]
+; GCN1-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_nand_i32_noret:
+; GCN2: ; %bb.0:
+; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT: flat_load_dword v4, v[0:1]
+; GCN2-NEXT: s_mov_b64 s[4:5], 0
+; GCN2-NEXT: .LBB50_1: ; %atomicrmw.start
+; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: v_and_b32_e32 v3, v4, v2
+; GCN2-NEXT: v_not_b32_e32 v3, v3
+; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT: buffer_wbinvl1_vol
+; GCN2-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
+; GCN2-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN2-NEXT: v_mov_b32_e32 v4, v3
+; GCN2-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GCN2-NEXT: s_cbranch_execnz .LBB50_1
+; GCN2-NEXT: ; %bb.2: ; %atomicrmw.end
+; GCN2-NEXT: s_or_b64 exec, exec, s[4:5]
+; GCN2-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_nand_i32_noret:
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: flat_load_dword v4, v[0:1]
; GCN3-NEXT: s_mov_b64 s[4:5], 0
-; GCN3-NEXT: .LBB40_1: ; %atomicrmw.start
+; GCN3-NEXT: .LBB50_1: ; %atomicrmw.start
; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_and_b32_e32 v3, v4, v2
@@ -1489,7 +1819,7 @@ define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
; GCN3-NEXT...
[truncated]
|
677250e
to
ee3a6cf
Compare
Add baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms. New metadata defined in llvm#85052
Add baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms.
New metadata defined in #85052