Skip to content

Commit

Permalink
[CGP][AArch64] Rebase the common base offset for better ISel
Browse files Browse the repository at this point in the history
When all the large const offsets masked with the same value from bit-12 to bit-23.
Fold
  add     x8, x0, llvm#2031, lsl #12
  add     x8, x8, llvm#960
  ldr     x9, [x8, x8]
  ldr     x8, [x8, llvm#2056]

into
  add     x8, x0, llvm#2031, lsl #12
  ldr     x9, [x8, llvm#960]
  ldr     x8, [x8, llvm#3016]
  • Loading branch information
vfdff committed Dec 5, 2023
1 parent 6a8a562 commit d6f4d52
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 65 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}

int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
}

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const {
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
Expand Down
8 changes: 7 additions & 1 deletion llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
Expand Down Expand Up @@ -2721,6 +2721,12 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;

/// Return the prefered common base offset.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
return 0;
}

/// Return true if the specified immediate is legal icmp immediate, that is
/// the target has icmp instructions which can compare a register against the
/// immediate without having to materialize the immediate into a register.
Expand Down
79 changes: 50 additions & 29 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
Value *NewBaseGEP = nullptr;

auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
GetElementPtrInst *GEP) {
LLVMContext &Ctx = GEP->getContext();
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
Type *I8PtrTy =
PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);

BasicBlock::iterator NewBaseInsertPt;
BasicBlock *NewBaseInsertBB;
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
// If the base of the struct is an instruction, the new base will be
// inserted close to it.
NewBaseInsertBB = BaseI->getParent();
if (isa<PHINode>(BaseI))
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
} else {
// If the current base is an argument or global value, the new base
// will be inserted to the entry block.
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
NewBaseGEP =
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
NewGEPBases.insert(NewBaseGEP);
return;
};

// Check whether all the offsets can be encoded with prefered common base.
if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
BaseOffset = PreferBase;
// Create a new base if the offset of the BaseGEP can be decoded with one
// instruction.
createNewBase(BaseOffset, OldBase, BaseGEP);
}

auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
GetElementPtrInst *GEP = LargeOffsetGEP->first;
Expand Down Expand Up @@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
// pointer for the new base first.
BasicBlock::iterator NewBaseInsertPt;
BasicBlock *NewBaseInsertBB;
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
// If the base of the struct is an instruction, the new base will be
// inserted close to it.
NewBaseInsertBB = BaseI->getParent();
if (isa<PHINode>(BaseI))
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
} else {
// If the current base is an argument or global value, the new base
// will be inserted to the entry block.
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
NewBaseGEP =
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
NewGEPBases.insert(NewBaseGEP);
createNewBase(BaseOffset, OldBase, GEP);
}

IRBuilder<> Builder(GEP);
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16070,6 +16070,20 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
AM.Scale);
}

// Check whether the 2 offsets belong to the same imm24 range, and their high
// 12bits are same, then their high part can be decoded with the offset of add.
int64_t
AArch64TargetLowering::getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
int64_t HighPart = MinOffset & ~0xfffULL;
if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
// Rebase the value to an integer multiple of imm12.
return HighPart;
}

return 0;
}

bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,9 @@ class AArch64TargetLowering : public TargetLowering {
unsigned AS,
Instruction *I = nullptr) const override;

int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const override;

/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-addrmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,8 @@ define i64 @LdOffset_i64_multi_offset(ptr %a) {
; CHECK-LABEL: LdOffset_i64_multi_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: add x8, x0, #2031, lsl #12 // =8318976
; CHECK-NEXT: add x8, x8, #960
; CHECK-NEXT: ldr x9, [x8]
; CHECK-NEXT: ldr x8, [x8, #2056]
; CHECK-NEXT: ldr x9, [x8, #960]
; CHECK-NEXT: ldr x8, [x8, #3016]
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
Expand Down
57 changes: 25 additions & 32 deletions llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@
define void @test1(ptr %s, i32 %n) {
; CHECK-LABEL: test1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w10, #40000 // =0x9c40
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: add x9, x9, x10
; CHECK-NEXT: cmp w8, w1
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w8, [x9, #4]
; CHECK-NEXT: add w8, w8, #1
; CHECK-NEXT: str w8, [x9]
; CHECK-NEXT: cmp w8, w1
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %while_end
; CHECK-NEXT: ret
Expand Down Expand Up @@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz x0, .LBB1_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
; CHECK-NEXT: mov w8, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %while_end
Expand Down Expand Up @@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
; CHECK-NEXT: csel x8, x1, x0, ne
; CHECK-NEXT: cbz x8, .LBB2_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
; CHECK-NEXT: mov w10, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, x10
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: .LBB2_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.lt .LBB2_2
; CHECK-NEXT: .LBB2_3: // %while_end
Expand Down Expand Up @@ -141,41 +138,38 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .cfi_personality 156, DW.ref.__FrameHandler
; CHECK-NEXT: .cfi_lsda 28, .Lexception0
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: mov w21, wzr
; CHECK-NEXT: mov w20, #40000 // =0x9c40
; CHECK-NEXT: mov w20, wzr
; CHECK-NEXT: .LBB3_1: // %while_cond
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: bl foo
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: add x8, x0, x20
; CHECK-NEXT: cmp w21, w19
; CHECK-NEXT: str wzr, [x8]
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w20, w19
; CHECK-NEXT: str wzr, [x8, #3136]
; CHECK-NEXT: b.ge .LBB3_4
; CHECK-NEXT: // %bb.3: // %while_body
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: str w21, [x8, #4]
; CHECK-NEXT: add w21, w21, #1
; CHECK-NEXT: str w21, [x8]
; CHECK-NEXT: str w20, [x8, #3140]
; CHECK-NEXT: add w20, w20, #1
; CHECK-NEXT: str w20, [x8, #3136]
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_4: // %while_end
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w20
; CHECK-NEXT: .cfi_restore w21
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_5: // %cleanup
Expand Down Expand Up @@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, #19, lsl #12 // =77824
; CHECK-NEXT: add x8, x8, #2176
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #2180]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #2176]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB4_1
; CHECK-NEXT: .LBB4_2: // %while_end
Expand Down

0 comments on commit d6f4d52

Please sign in to comment.