-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add test for readfirstlane with i1 type #109657
Conversation
Add test for readfirstlane with i1 type to demonstrate the lowering works. Also simplify existing tests a bit - the declarations are not strictly needed anymore.
@llvm/pr-subscribers-backend-amdgpu Author: Piotr Sobczak (piotrAMD) ChangesAdd test for readfirstlane with i1 type to demonstrate the lowering works. Also simplify existing tests a bit - the declarations are not strictly needed anymore. Full diff: https://github.com/llvm/llvm-project/pull/109657.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index b061d53de5d3c5..9cb3ffeb26b41e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -2,11 +2,65 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
-declare i64 @llvm.amdgcn.readfirstlane.i64(i64) #0
-declare double @llvm.amdgcn.readfirstlane.f64(double) #0
+define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
+; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
+; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
+; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
+; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
+; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
+ store i1 %readfirstlane, ptr addrspace(1) %out, align 4
+ ret void
+}
-define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
+define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
+; CHECK-SDAG: ; %bb.0:
+; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4
+; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0
+; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
+; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
+; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
+; CHECK-GISEL: ; %bb.0:
+; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4
+; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4
+; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
+; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
+; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp ugt i32 %src, 42
+ %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
+ %sel = select i1 %readfirstlane, i32 %src, i32 %src1
+ store i32 %sel, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +83,7 @@ define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
ret void
}
-define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
+define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -56,7 +110,7 @@ define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
ret void
}
-define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
+define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -83,7 +137,7 @@ define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b32 s0, 32
@@ -104,7 +158,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32
@@ -125,7 +179,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b32 s0, 0
@@ -148,7 +202,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -173,7 +227,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -201,7 +255,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -230,7 +284,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_m0:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -262,7 +316,7 @@ define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
ret void
}
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -294,7 +348,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -328,7 +382,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -362,7 +416,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1
ret void
}
-define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_fi:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s15
@@ -593,6 +647,3 @@ define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
ret void
}
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feels like this maybe shouldn't work, but I guess it works already
Add test for readfirstlane with i1 type to demonstrate the lowering works.
Also simplify existing tests a bit - the declarations are not strictly needed anymore.