Skip to content

Commit

Permalink
[SeperateConstOffsetFromGEP] Handle or disjoint flags (#76997)
Browse files Browse the repository at this point in the history
This commit extends separate-const-offset-from-gep to look at the
newly-added `disjoint` flag on `or` instructions so as to preserve
additional opportunities for optimization.

The tests were pre-committed in #76972.
  • Loading branch information
krzysz00 authored Jan 26, 2024
1 parent a437347 commit 63fe80f
Show file tree
Hide file tree
Showing 11 changed files with 59 additions and 62 deletions.
34 changes: 14 additions & 20 deletions llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
Expand Down Expand Up @@ -235,18 +236,16 @@ class ConstantOffsetExtractor {
/// \p UserChainTail Outputs the tail of UserChain so that we can
/// garbage-collect unused instructions in UserChain.
static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
User *&UserChainTail, const DominatorTree *DT);
User *&UserChainTail);

/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
/// failed). The meaning of the arguments are the same as Extract.
static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
const DominatorTree *DT);
static int64_t Find(Value *Idx, GetElementPtrInst *GEP);

private:
ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
}
ConstantOffsetExtractor(Instruction *InsertionPt)
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {}

/// Searches the expression that computes V for a non-zero constant C s.t.
/// V can be reassociated into the form V' + C. If the searching is
Expand Down Expand Up @@ -336,7 +335,6 @@ class ConstantOffsetExtractor {
Instruction *IP;

const DataLayout &DL;
const DominatorTree *DT;
};

/// A pass that tries to split every GEP in the function into a variadic
Expand Down Expand Up @@ -519,12 +517,10 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
}

Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
// Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
// don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
// FIXME: this does not appear to be covered by any tests
// (with x86/aarch64 backends at least)
// Do not trace into "or" unless it is equivalent to "add".
// This is the case if the or's disjoint flag is set.
if (BO->getOpcode() == Instruction::Or &&
!haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO)))
!cast<PossiblyDisjointInst>(BO)->isDisjoint())
return false;

// FIXME: We don't currently support constants from the RHS of subs,
Expand Down Expand Up @@ -778,9 +774,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
}

Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
User *&UserChainTail,
const DominatorTree *DT) {
ConstantOffsetExtractor Extractor(GEP, DT);
User *&UserChainTail) {
ConstantOffsetExtractor Extractor(GEP);
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
Expand All @@ -795,10 +790,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
return IdxWithoutConstOffset;
}

int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
const DominatorTree *DT) {
int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
return ConstantOffsetExtractor(GEP, DT)
return ConstantOffsetExtractor(GEP)
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds())
.getSExtValue();
Expand Down Expand Up @@ -836,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,

// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
Expand Down Expand Up @@ -1026,7 +1020,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Value *OldIdx = GEP->getOperand(I);
User *UserChainTail;
Value *NewIdx =
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
if (NewIdx != nullptr) {
// Switches to the index with the constant offset removed.
GEP->setOperand(I, NewIdx);
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspac
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -55,17 +55,17 @@ define amdgpu_cs void @test1_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2, i32, p
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -90,17 +90,17 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) {
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -125,17 +125,17 @@ define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) {
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ main_body:
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
Expand Down Expand Up @@ -270,7 +270,7 @@ main_body:
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1157,38 +1157,38 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
; GFX11-NEXT: s_setpc_b64 s[30:31]
%idx = shl i32 %idxp, 4

%i.0 = or i32 %idx, 0
%i.0 = or disjoint i32 %idx, 0
%p.0 = getelementptr half, ptr addrspace(3) %p, i32 %i.0
%x.0 = load i16, ptr addrspace(3) %p.0, align 4
%v0p = insertelement <8 x i16> poison, i16 %x.0, i32 0
%i.1 = or i32 %idx, 1
%i.1 = or disjoint i32 %idx, 1
%p.1 = getelementptr half, ptr addrspace(3) %p, i32 %i.1
%x.1 = load i16, ptr addrspace(3) %p.1, align 2
%v0 = insertelement <8 x i16> %v0p, i16 %x.1, i32 1

%i.2 = or i32 %idx, 2
%i.2 = or disjoint i32 %idx, 2
%p.2 = getelementptr half, ptr addrspace(3) %p, i32 %i.2
%x.2 = load i16, ptr addrspace(3) %p.2, align 4
%v1p = insertelement <8 x i16> poison, i16 %x.2, i32 0
%i.3 = or i32 %idx, 3
%i.3 = or disjoint i32 %idx, 3
%p.3 = getelementptr half, ptr addrspace(3) %p, i32 %i.3
%x.3 = load i16, ptr addrspace(3) %p.3, align 2
%v1 = insertelement <8 x i16> %v1p, i16 %x.3, i32 1

%i.4 = or i32 %idx, 4
%i.4 = or disjoint i32 %idx, 4
%p.4 = getelementptr half, ptr addrspace(3) %p, i32 %i.4
%x.4 = load i16, ptr addrspace(3) %p.4, align 4
%v2p = insertelement <8 x i16> poison, i16 %x.4, i32 0
%i.5 = or i32 %idx, 5
%i.5 = or disjoint i32 %idx, 5
%p.5 = getelementptr half, ptr addrspace(3) %p, i32 %i.5
%x.5 = load i16, ptr addrspace(3) %p.5, align 2
%v2 = insertelement <8 x i16> %v2p, i16 %x.5, i32 1

%i.6 = or i32 %idx, 6
%i.6 = or disjoint i32 %idx, 6
%p.6 = getelementptr half, ptr addrspace(3) %p, i32 %i.6
%x.6 = load i16, ptr addrspace(3) %p.6, align 4
%v3p = insertelement <8 x i16> poison, i16 %x.6, i32 0
%i.7 = or i32 %idx, 7
%i.7 = or disjoint i32 %idx, 7
%p.7 = getelementptr half, ptr addrspace(3) %p, i32 %i.7
%x.7 = load i16, ptr addrspace(3) %p.7, align 2
%v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
%192 = and i64 %191, 4294967168
%193 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 %192
%194 = shl nuw nsw i32 %178, 5
%195 = or i32 %194, 8
%195 = or disjoint i32 %194, 8
%196 = zext i32 %195 to i64
%197 = getelementptr inbounds i8, ptr addrspace(1) %193, i64 %196
%198 = getelementptr inbounds i8, ptr addrspace(1) %197, i64 -4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/vector-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
%t3 = shl nuw nsw i32 %t1, 9
%ttile_origin.2 = and i32 %t3, 130560
%tstart_offset_x_mul = shl nuw nsw i32 %t0, 1
%t4 = or i32 %ttile_origin.2, %tstart_offset_x_mul
%t6 = or i32 %t4, 1
%t8 = or i32 %t4, 128
%t4 = or disjoint i32 %ttile_origin.2, %tstart_offset_x_mul
%t6 = or disjoint i32 %t4, 1
%t8 = or disjoint i32 %t4, 128
%t9 = zext i32 %t8 to i64
%t10 = or i32 %t4, 129
%t10 = or disjoint i32 %t4, 129
%t11 = zext i32 %t10 to i64
%t20 = zext i32 %t2 to i64
%t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ for.body: ; preds = %for.body, %for.body
%idxprom = zext i32 %mul to i64
%arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom
%4 = load <16 x i8>, ptr %arrayidx, align 16
%add2 = or i32 %mul, 1
%add2 = or disjoint i32 %mul, 1
%idxprom3 = zext i32 %add2 to i64
%arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3
%5 = load <16 x i8>, ptr %arrayidx4, align 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/sched-addi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ entry:

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%offset.idx = or i64 %index, 1
%offset.idx = or disjoint i64 %index, 1
%0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0
%1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0
%wide.load = load <4 x double>, ptr %1, align 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ main_body:
%25 = getelementptr [0 x <8 x i32>], ptr addrspace(4) %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, ptr addrspace(4) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr [0 x <4 x i32>], ptr addrspace(4) %1, i32 0, i32 %28, !amdgpu.uniform !0
%30 = load <4 x i32>, ptr addrspace(4) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %30, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ define ptr @sext_or(i64 %a, i32 %b) {
;
entry:
%b1 = shl i32 %b, 2
%b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
%b2 = or disjoint i32 %b1, 1 ; (b << 2) and 1 have no common bits
%b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
%b2.ext = zext i32 %b2 to i64
%b3.ext = sext i32 %b3 to i64
Expand Down Expand Up @@ -335,7 +335,7 @@ define ptr @shl_add_or(i64 %a, ptr %ptr) {
entry:
%shl = shl i64 %a, 2
%add = add i64 %shl, 12
%or = or i64 %add, 1
%or = or disjoint i64 %add, 1
; ((a << 2) + 12) and 1 have no common bits. Therefore,
; SeparateConstOffsetFromGEP is able to extract the 12.
; TODO(jingyue): We could reassociate the expression to combine 12 and 1.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ define void @testOrDoesntSplit(ptr %p) {
ret void
}

define void @testNoBitsInCommonOrSplits(ptr %p) {
; CHECK-LABEL: define void @testNoBitsInCommonOrSplits(
; COM: The check for `or disjoint` removed the old hasNoBitsInCommon()
; COM: check, ensure that failing to annotate an or with disjoint makes
; COM: the optimization fail.
define void @testNoBitsInCommonOrDoesntSplit(ptr %p) {
; CHECK-LABEL: define void @testNoBitsInCommonOrDoesntSplit(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
; CHECK-NEXT: [[VAR_HIGH:%.*]] = and i64 [[VAR]], -16
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR_HIGH]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: [[OFF:%.*]] = or i64 [[VAR_HIGH]], 10
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
; CHECK-NEXT: ret void
;
%var = tail call i64 @foo()
Expand All @@ -46,9 +47,11 @@ define void @testDisjointOrSplits(ptr %p) {
; CHECK-LABEL: define void @testDisjointOrSplits(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
; CHECK-NEXT: [[OFF:%.*]] = or disjoint i64 [[VAR]], 10
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: ret void
;
%var = tail call i64 @foo()
Expand Down

0 comments on commit 63fe80f

Please sign in to comment.