Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME] Zero reserved bytes when allocating a new TPIDR2 object #68411

Merged
merged 1 commit into from
Oct 9, 2023

Conversation

kmclaughlin-arm
Copy link
Contributor

SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15) are zero. This patch ensures that the reserved bytes are cleared when allocating a new TPIDR2 block.

SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15)
are zero. This patch ensures that the reserved bytes are cleared when
allocating a new TPIDR2 block.
@llvmbot
Copy link
Member

llvmbot commented Oct 6, 2023

@llvm/pr-subscribers-backend-aarch64

Changes

SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15) are zero. This patch ensures that the reserved bytes are cleared when allocating a new TPIDR2 block.


Full diff: https://github.com/llvm/llvm-project/pull/68411.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11)
  • (modified) llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll (+12-4)
  • (modified) llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll (+17-10)
  • (modified) llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll (+8-4)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..4e0f0041ce14e56 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6382,6 +6382,17 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
       DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
   Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
 
+  // Set the reserved bytes (10-15) to zero
+  EVT PtrTy = Ptr.getValueType();
+  SDValue ReservedPtr =
+      DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy));
+  Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr,
+                       MPI);
+  ReservedPtr =
+      DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy));
+  Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr,
+                       MPI);
+
   return TPIDR2Obj;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 98a8769afea8513..b2bb62c42b11acd 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -220,6 +220,8 @@ define double  @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
 ; CHECK-COMMON-NEXT:    msub x8, x8, x8, x9
 ; CHECK-COMMON-NEXT:    mov sp, x8
 ; CHECK-COMMON-NEXT:    stur x8, [x29, #-16]
+; CHECK-COMMON-NEXT:    sturh wzr, [x29, #-6]
+; CHECK-COMMON-NEXT:    stur wzr, [x29, #-4]
 ; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
 ; CHECK-COMMON-NEXT:    cbz x8, .LBB6_2
 ; CHECK-COMMON-NEXT:    b .LBB6_1
@@ -256,6 +258,8 @@ define double  @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
 ; CHECK-COMMON-NEXT:    subs x9, x9, x8
 ; CHECK-COMMON-NEXT:    mov sp, x9
 ; CHECK-COMMON-NEXT:    stur x9, [x29, #-16]
+; CHECK-COMMON-NEXT:    sturh wzr, [x29, #-6]
+; CHECK-COMMON-NEXT:    stur wzr, [x29, #-4]
 ; CHECK-COMMON-NEXT:    sturh w8, [x29, #-8]
 ; CHECK-COMMON-NEXT:    sub x8, x29, #16
 ; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
@@ -294,10 +298,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
 ; CHECK-COMMON-NEXT:    mul x8, x8, x8
 ; CHECK-COMMON-NEXT:    sub x9, x9, x8
 ; CHECK-COMMON-NEXT:    mov sp, x9
+; CHECK-COMMON-NEXT:    sub x10, x29, #16
+; CHECK-COMMON-NEXT:    stur wzr, [x29, #-4]
+; CHECK-COMMON-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-COMMON-NEXT:    stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT:    sub x9, x29, #16
 ; CHECK-COMMON-NEXT:    sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-COMMON-NEXT:    bl __addtf3
 ; CHECK-COMMON-NEXT:    smstart za
 ; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
@@ -355,10 +361,12 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
 ; CHECK-COMMON-NEXT:    mul x8, x8, x8
 ; CHECK-COMMON-NEXT:    sub x9, x9, x8
 ; CHECK-COMMON-NEXT:    mov sp, x9
+; CHECK-COMMON-NEXT:    sub x10, x29, #16
+; CHECK-COMMON-NEXT:    stur wzr, [x29, #-4]
+; CHECK-COMMON-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-COMMON-NEXT:    stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT:    sub x9, x29, #16
 ; CHECK-COMMON-NEXT:    sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-COMMON-NEXT:    bl fmod
 ; CHECK-COMMON-NEXT:    smstart za
 ; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index ad16402a18f8b92..3490817b8a49e68 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -17,10 +17,12 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
 ; CHECK-NEXT:    mul x8, x8, x8
 ; CHECK-NEXT:    sub x9, x9, x8
 ; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stur wzr, [x29, #-4]
+; CHECK-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-NEXT:    stur x9, [x29, #-16]
-; CHECK-NEXT:    sub x9, x29, #16
 ; CHECK-NEXT:    sturh w8, [x29, #-8]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEXT:    bl private_za_callee
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
@@ -51,6 +53,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
 ; CHECK-NEXT:    sub x8, x8, x19
 ; CHECK-NEXT:    mov sp, x8
 ; CHECK-NEXT:    sub x20, x29, #16
+; CHECK-NEXT:    stur wzr, [x29, #-4]
+; CHECK-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-NEXT:    stur x8, [x29, #-16]
 ; CHECK-NEXT:    sturh w19, [x29, #-8]
 ; CHECK-NEXT:    msr TPIDR2_EL0, x20
@@ -95,10 +99,12 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_psta
 ; CHECK-NEXT:    mul x8, x8, x8
 ; CHECK-NEXT:    sub x9, x9, x8
 ; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stur wzr, [x29, #-4]
+; CHECK-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-NEXT:    stur x9, [x29, #-16]
-; CHECK-NEXT:    sub x9, x29, #16
 ; CHECK-NEXT:    sturh w8, [x29, #-8]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEXT:    bl cosf
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
@@ -132,10 +138,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
 ; CHECK-NEXT:    mul x8, x8, x8
 ; CHECK-NEXT:    sub x9, x9, x8
 ; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #80
+; CHECK-NEXT:    stur wzr, [x29, #-68]
+; CHECK-NEXT:    sturh wzr, [x29, #-70]
 ; CHECK-NEXT:    stur x9, [x29, #-80]
-; CHECK-NEXT:    sub x9, x29, #80
 ; CHECK-NEXT:    sturh w8, [x29, #-72]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEXT:    bl __arm_sme_state
 ; CHECK-NEXT:    and x19, x0, #0x1
 ; CHECK-NEXT:    tbz x19, #0, .LBB3_2
@@ -185,10 +193,9 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    msub x8, x8, x8, x9
 ; CHECK-NEXT:    mov sp, x8
-; CHECK-NEXT:    stur x8, [x29, #-80]
-; CHECK-NEXT:    sub x8, x29, #80
-; CHECK-NEXT:    sturh wzr, [x29, #-72]
-; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    sub x9, x29, #80
+; CHECK-NEXT:    stp x8, xzr, [x29, #-80]
+; CHECK-NEXT:    msr TPIDR2_EL0, x9
 ; CHECK-NEXT:    bl __arm_sme_state
 ; CHECK-NEXT:    and x19, x0, #0x1
 ; CHECK-NEXT:    tbz x19, #0, .LBB4_2
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index de7df1c9831908f..ddb3dc6324caea4 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -15,10 +15,12 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
 ; CHECK-NEXT:    mul x8, x8, x8
 ; CHECK-NEXT:    sub x9, x9, x8
 ; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stur wzr, [x29, #-4]
+; CHECK-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-NEXT:    stur x9, [x29, #-16]
-; CHECK-NEXT:    sub x9, x29, #16
 ; CHECK-NEXT:    sturh w8, [x29, #-8]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEXT:    bl private_za_callee
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
@@ -47,10 +49,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
 ; CHECK-NEXT:    mul x8, x8, x8
 ; CHECK-NEXT:    sub x9, x9, x8
 ; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stur wzr, [x29, #-4]
+; CHECK-NEXT:    sturh wzr, [x29, #-6]
 ; CHECK-NEXT:    stur x9, [x29, #-16]
-; CHECK-NEXT:    sub x9, x29, #16
 ; CHECK-NEXT:    sturh w8, [x29, #-8]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEXT:    bl __addtf3
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0

Copy link
Collaborator

@sdesmalen-arm sdesmalen-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing, this zeroing was still missing and led to some runtime failures when I tried to test the compiler-rt routines https://reviews.llvm.org/D154045.

LGTM

Copy link
Contributor

@jroelofs jroelofs left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants