-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenCL][RISCV] Support SPIR_KERNEL calling convention #69282
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Wang Pengcheng (wangpc-pp) ChangesX86 supports this calling convention but I don't find any special This should fix #69197. Full diff: https://github.com/llvm/llvm-project/pull/69282.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ed1f7b6c50a4d12..16bd2564867ba0e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
@@ -16997,6 +16998,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::SPIR_KERNEL:
break;
case CallingConv::GHC:
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
diff --git a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll
new file mode 100644
index 000000000000000..24f5c54021e3ae0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV64
+
+; Check the SPIR_KERNEL call convention work
+
+declare dso_local i64 @_Z13get_global_idj(i32 noundef signext)
+
+define dso_local spir_kernel void @foo(ptr nocapture noundef readonly align 4 %a, ptr nocapture noundef readonly align 4 %b, ptr nocapture noundef writeonly align 4 %c) {
+; RV32-LABEL: foo:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: .cfi_offset s1, -12
+; RV32-NEXT: .cfi_offset s2, -16
+; RV32-NEXT: mv s0, a2
+; RV32-NEXT: mv s1, a1
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: call _Z13get_global_idj
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: add s2, s2, a0
+; RV32-NEXT: flw fa5, 0(s2)
+; RV32-NEXT: add s1, s1, a0
+; RV32-NEXT: flw fa4, 0(s1)
+; RV32-NEXT: fadd.s fa5, fa5, fa4
+; RV32-NEXT: add a0, s0, a0
+; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: foo:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: .cfi_def_cfa_offset 32
+; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: .cfi_offset s1, -24
+; RV64-NEXT: .cfi_offset s2, -32
+; RV64-NEXT: mv s0, a2
+; RV64-NEXT: mv s1, a1
+; RV64-NEXT: mv s2, a0
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: call _Z13get_global_idj
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: slli a0, a0, 2
+; RV64-NEXT: add s2, s2, a0
+; RV64-NEXT: flw fa5, 0(s2)
+; RV64-NEXT: add s1, s1, a0
+; RV64-NEXT: flw fa4, 0(s1)
+; RV64-NEXT: fadd.s fa5, fa5, fa4
+; RV64-NEXT: add a0, s0, a0
+; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: ret
+entry:
+ %call = tail call i64 @_Z13get_global_idj(i32 noundef signext 0)
+ %sext = shl i64 %call, 32
+ %idxprom = ashr exact i64 %sext, 32
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %idxprom
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %idxprom
+ %1 = load float, ptr %arrayidx2, align 4
+ %add = fadd float %0, %1
+ %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %idxprom
+ store float %add, ptr %arrayidx4, align 4
+ ret void
+}
\ No newline at end of file
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Could you simplify the test case?
X86 supports this calling convention but I don't find any special handling, so I think we can just handle it via CC_RISCV. This should fix llvm#69197.
Remove unnecessary include
e1178cc
to
ce143a0
Compare
X86 supports this calling convention but I don't find any special
handling, so I think we can just handle it via CC_RISCV.
This should fix #69197.