diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 67b7ec3ae3c9eca..bded45cf6b02a7f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3207,12 +3207,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // TODO: apply range metadata for range check patterns? } - // Separate storage assumptions apply to the underlying allocations, not any - // particular pointer within them. When evaluating the hints for AA purposes - // we getUnderlyingObject them; by precomputing the answers here we can - // avoid having to do so repeatedly there. for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { OperandBundleUse OBU = II->getOperandBundleAt(Idx); + + // Separate storage assumptions apply to the underlying allocations, not + // any particular pointer within them. When evaluating the hints for AA + // purposes we getUnderlyingObject them; by precomputing the answers here + // we can avoid having to do so repeatedly there. if (OBU.getTagName() == "separate_storage") { assert(OBU.Inputs.size() == 2); auto MaybeSimplifyHint = [&](const Use &U) { @@ -3226,6 +3227,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { MaybeSimplifyHint(OBU.Inputs[0]); MaybeSimplifyHint(OBU.Inputs[1]); } + // Try to fold alignment assumption into a load's !align metadata, if the + // assumption is valid in the load's context. + if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) { + RetainedKnowledge RK = getKnowledgeFromBundle( + *cast(II), II->bundle_op_info_begin()[Idx]); + if (!RK || RK.AttrKind != Attribute::Alignment || + !isPowerOf2_64(RK.ArgValue)) + continue; + + auto *LI = dyn_cast(OBU.Inputs[0]); + if (!LI || + !isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) + continue; + + LI->setMetadata( + LLVMContext::MD_align, + MDNode::get(II->getContext(), ValueAsMetadata::getConstant( + Builder.getInt64(RK.ArgValue)))); + auto *New = CallBase::removeOperandBundle(II, OBU.getTagID()); + return New; + } } // Convert nonnull assume like: diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 3a0ae6b01a1144f..aeb98d3204caf8f 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -1599,6 +1601,67 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (InVal.IsLoad) if (auto *I = dyn_cast(Op)) combineMetadataForCSE(I, &Inst, false); + + // If the load has align and noundef metadata, preserve it via an + // alignment assumption. Note that this doesn't use salavageKnowledge, + // as we need to create the assumption for the value we replaced the + // load with. + if (auto *AlignMD = Inst.getMetadata(LLVMContext::MD_align)) { + if (Inst.hasMetadata(LLVMContext::MD_noundef) || + programUndefinedIfPoison(&Inst)) { + Inst.setMetadata(LLVMContext::MD_align, nullptr); + auto *B = mdconst::extract(AlignMD->getOperand(0)); + auto KB = computeKnownBits(Op, SQ.DL); + unsigned AlignFromKB = 1 << KB.countMinTrailingZeros(); + if (AlignFromKB < B->getZExtValue()) { + SetVector WorkList; + bool AlignNeeded = false; + for (const User *U : Inst.users()) + if (auto *I = dyn_cast(U)) + WorkList.insert(I); + + for (unsigned I = 0; I != WorkList.size(); ++I) { + auto *Curr = WorkList[I]; + if (auto *LI = dyn_cast(Curr)) { + if (LI->getAlign().value() < B->getZExtValue()) { + AlignNeeded = true; + break; + } + continue; + } + if (auto *SI = dyn_cast(Curr)) { + if (SI->getAlign().value() < B->getZExtValue()) { + AlignNeeded = true; + break; + } + continue; + } + if (isa(Curr)) { + AlignNeeded = true; + break; + } + if (isa(Curr) && + !isa(cast(Curr)->getOperand(0)) && + !isa(cast(Curr)->getOperand(1))) { + AlignNeeded = true; + break; + } + if (WorkList.size() > 16) { + AlignNeeded = true; + break; + } + + for (const User *U : Curr->users()) + WorkList.insert(cast(U)); + } + if (AlignNeeded) { + IRBuilder Builder(&Inst); + Builder.CreateAlignmentAssumption(SQ.DL, Op, B); + } + } + } + } + if (!Inst.use_empty()) Inst.replaceAllUsesWith(Op); salvageKnowledge(&Inst, &AC); diff --git a/llvm/test/Transforms/EarlyCSE/materialize-align-assumptions.ll b/llvm/test/Transforms/EarlyCSE/materialize-align-assumptions.ll index ea63376957162b1..837a73a00d64310 100644 --- a/llvm/test/Transforms/EarlyCSE/materialize-align-assumptions.ll +++ b/llvm/test/Transforms/EarlyCSE/materialize-align-assumptions.ll @@ -3,6 +3,24 @@ declare void @foo(ptr) +define ptr @align_replacement_does_not_have_align_metadata_missing_noundef(ptr noalias %p) { +; CHECK-LABEL: define ptr @align_replacement_does_not_have_align_metadata_missing_noundef( +; CHECK-SAME: ptr noalias [[P:%.*]]) { +; CHECK-NEXT: [[L_1:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: call void @foo(ptr [[L_1]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[L_1]], i64 4 +; CHECK-NEXT: store ptr [[GEP]], ptr [[P]], align 8 +; CHECK-NEXT: ret ptr [[GEP]] +; + %l.1 = load ptr, ptr %p, align 8 + call void @foo(ptr %l.1) + %l.2 = load ptr, ptr %p, align 8 + %gep = getelementptr i8, ptr %l.2, i64 4 + store ptr %gep, ptr %p, align 8 + %l.3 = load ptr, ptr %p, align 8, !align !0 + ret ptr %l.3 +} + define ptr @align_replacement_does_not_have_align_metadata(ptr noalias %p) { ; CHECK-LABEL: define ptr @align_replacement_does_not_have_align_metadata( ; CHECK-SAME: ptr noalias [[P:%.*]]) { @@ -10,6 +28,7 @@ define ptr @align_replacement_does_not_have_align_metadata(ptr noalias %p) { ; CHECK-NEXT: call void @foo(ptr [[L_1]]) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[L_1]], i64 4 ; CHECK-NEXT: store ptr [[GEP]], ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP]], i64 4) ] ; CHECK-NEXT: ret ptr [[GEP]] ; %l.1 = load ptr, ptr %p, align 8 @@ -17,7 +36,7 @@ define ptr @align_replacement_does_not_have_align_metadata(ptr noalias %p) { %l.2 = load ptr, ptr %p, align 8 %gep = getelementptr i8, ptr %l.2, i64 4 store ptr %gep, ptr %p, align 8 - %l.3 = load ptr, ptr %p, align 8, !align !0 + %l.3 = load ptr, ptr %p, align 8, !align !0, !noundef !{} ret ptr %l.3 } @@ -27,12 +46,13 @@ define ptr @align_replacement_does_not_have_align_metadata2(ptr noalias %p) { ; CHECK-NEXT: [[L_1:%.*]] = load ptr, ptr [[P]], align 8 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[L_1]], i64 4 ; CHECK-NEXT: store ptr [[GEP]], ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP]], i64 4) ] ; CHECK-NEXT: ret ptr [[GEP]] ; %l.1 = load ptr, ptr %p, align 8 %gep = getelementptr i8, ptr %l.1, i64 4 store ptr %gep, ptr %p, align 8 - %l.2 = load ptr, ptr %p, align 8, !align !0 + %l.2 = load ptr, ptr %p, align 8, !align !0, !noundef !{} ret ptr %l.2 } @@ -54,11 +74,12 @@ define ptr @align_replacement_has_smaller_alignment(ptr noalias %p) { ; CHECK-SAME: ptr noalias [[P:%.*]]) { ; CHECK-NEXT: [[L_1:%.*]] = load ptr, ptr [[P]], align 8, !align [[META0]] ; CHECK-NEXT: call void @foo(ptr [[L_1]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[L_1]], i64 8) ] ; CHECK-NEXT: ret ptr [[L_1]] ; %l.1 = load ptr, ptr %p, align 8, !align !0 call void @foo(ptr %l.1) - %l.2 = load ptr, ptr %p, align 8, !align !1 + %l.2 = load ptr, ptr %p, align 8, !align !1, !noundef !{} ret ptr %l.2 } @@ -67,12 +88,12 @@ define ptr @align_replacement_has_larger_alignment(ptr %p) { ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[L_1:%.*]] = load ptr, ptr [[P]], align 8, !align [[META1:![0-9]+]] ; CHECK-NEXT: call void @foo(ptr [[L_1]]) -; CHECK-NEXT: [[L_2:%.*]] = load ptr, ptr [[P]], align 8, !align [[META0]] +; CHECK-NEXT: [[L_2:%.*]] = load ptr, ptr [[P]], align 8, !align [[META0]], !noundef [[META2:![0-9]+]] ; CHECK-NEXT: ret ptr [[L_2]] ; %l.1 = load ptr, ptr %p, align 8, !align !1 call void @foo(ptr %l.1) - %l.2 = load ptr, ptr %p, align 8, !align !0 + %l.2 = load ptr, ptr %p, align 8, !align !0, !noundef !{} ret ptr %l.2 } @@ -81,12 +102,12 @@ define ptr @align_1(ptr %p) { ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[L_1:%.*]] = load ptr, ptr [[P]], align 8 ; CHECK-NEXT: call void @foo(ptr [[L_1]]) -; CHECK-NEXT: [[L_2:%.*]] = load ptr, ptr [[P]], align 8, !align [[META2:![0-9]+]] +; CHECK-NEXT: [[L_2:%.*]] = load ptr, ptr [[P]], align 8, !align [[META3:![0-9]+]], !noundef [[META2]] ; CHECK-NEXT: ret ptr [[L_2]] ; %l.1 = load ptr, ptr %p, align 8 call void @foo(ptr %l.1) - %l.2 = load ptr, ptr %p, align 8, !align !2 + %l.2 = load ptr, ptr %p, align 8, !align !2, !noundef !{} ret ptr %l.2 } @@ -96,5 +117,6 @@ define ptr @align_1(ptr %p) { ;. ; CHECK: [[META0]] = !{i64 4} ; CHECK: [[META1]] = !{i64 8} -; CHECK: [[META2]] = !{i64 1} +; CHECK: [[META2]] = !{} +; CHECK: [[META3]] = !{i64 1} ;. diff --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll index 47659ff8c84909d..549821802fe674b 100644 --- a/llvm/test/Transforms/InstCombine/assume-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-align.ll @@ -123,11 +123,9 @@ define i8 @assume_align_non_pow2(ptr %p) { ret i8 %v } -; TODO: Can fold alignment assumption into !align metadata on load. define ptr @fold_assume_align_pow2_of_loaded_pointer_into_align_metadata(ptr %p) { ; CHECK-LABEL: @fold_assume_align_pow2_of_loaded_pointer_into_align_metadata( -; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 8) ] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0:![0-9]+]] ; CHECK-NEXT: ret ptr [[P2]] ; %p2 = load ptr, ptr %p @@ -135,6 +133,16 @@ define ptr @fold_assume_align_pow2_of_loaded_pointer_into_align_metadata(ptr %p) ret ptr %p2 } +define ptr @fold_assume_align_i32_pow2_of_loaded_pointer_into_align_metadata(ptr %p) { +; CHECK-LABEL: @fold_assume_align_i32_pow2_of_loaded_pointer_into_align_metadata( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0]] +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i32 8) ] + ret ptr %p2 +} + define ptr @dont_fold_assume_align_pow2_of_loaded_pointer_into_align_metadata_due_to_call(ptr %p) { ; CHECK-LABEL: @dont_fold_assume_align_pow2_of_loaded_pointer_into_align_metadata_due_to_call( ; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8 @@ -171,3 +179,19 @@ define ptr @dont_fold_assume_align_zero_of_loaded_pointer_into_align_metadata(pt call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 0) ] ret ptr %p2 } + +; !align must have a constant integer alignment. +define ptr @dont_fold_assume_align_not_constant_of_loaded_pointer_into_align_metadata(ptr %p, i64 %align) { +; CHECK-LABEL: @dont_fold_assume_align_not_constant_of_loaded_pointer_into_align_metadata( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META1:![0-9]+]] +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 %align) ] + ret ptr %p2 +} + +;. +; CHECK: [[META0]] = !{i64 8} +; CHECK: [[META1]] = !{i64 1} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/infer-align-from-assumption.ll b/llvm/test/Transforms/PhaseOrdering/infer-align-from-assumption.ll new file mode 100644 index 000000000000000..91372795531dc8a --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/infer-align-from-assumption.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='default' -S %s | FileCheck %s + +target triple = "arm64-apple-macosx" + +declare void @llvm.assume(i1 noundef) + +define i32 @entry(ptr %0) { +; CHECK-LABEL: define i32 @entry( +; CHECK-SAME: ptr nocapture [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !align [[META0:![0-9]+]] +; CHECK-NEXT: [[DOT0_COPYLOAD_I_I_I:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD_I_I_I]]) +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4 +; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP0]], align 8 +; CHECK-NEXT: [[DOT0_COPYLOAD_I_I_I1:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD_I_I_I1]]) +; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4 +; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP0]], align 8 +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = call i32 @fn1(ptr %0) + %3 = call i32 @fn1(ptr %0) + ret i32 %3 +} + + +define i32 @fn1(ptr %0) { +; CHECK-LABEL: define i32 @fn1( +; CHECK-SAME: ptr nocapture [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !align [[META0]] +; CHECK-NEXT: [[DOT0_COPYLOAD_I_I:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD_I_I]]) +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4 +; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP0]], align 8 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %2 = call i32 @fn2(ptr %0) + ret i32 %2 +} + +define i32 @fn2(ptr %0) { +; CHECK-LABEL: define i32 @fn2( +; CHECK-SAME: ptr nocapture [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !align [[META0]] +; CHECK-NEXT: [[DOT0_COPYLOAD_I:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD_I]]) +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4 +; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP0]], align 8 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %2 = load ptr, ptr %0, align 8 + %3 = call i32 @load_assume_aligned(ptr %2) + %4 = load ptr, ptr %0, align 8 + %5 = getelementptr i8, ptr %4, i64 4 + store ptr %5, ptr %0, align 8 + ret i32 %3 +} + +define i32 @load_assume_aligned(ptr %0) { +; CHECK-LABEL: define i32 @load_assume_aligned( +; CHECK-SAME: ptr [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[DOT0_COPYLOAD:%.*]] = load i32, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ] + %.0.copyload = load i32, ptr %0, align 1 + %2 = call i32 @swap(i32 %.0.copyload) + ret i32 %2 +} + +declare i32 @swap(i32) +;. +; CHECK: [[META0]] = !{i64 4} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll index b1cee80bde33fde..d57af87d1644744 100644 --- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -35,8 +35,7 @@ define void @caller1(i1 %c, ptr align 1 %ptr) { ; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]] ; ASSUMPTIONS-ON: common.ret: ; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ] -; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 8) ] -; ASSUMPTIONS-ON-NEXT: store volatile i64 0, ptr [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 0, ptr [[PTR:%.*]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8